diff --git "a/training_log_truc.txt" "b/training_log_truc.txt" new file mode 100644--- /dev/null +++ "b/training_log_truc.txt" @@ -0,0 +1,50491 @@ +W0816 17:15:33.878000 100061 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 17:15:33.878000 100061 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:15:33.878000 100061 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 17:15:33.878000 100061 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:15:34.265000 80904 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 17:15:34.265000 80904 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:15:34.265000 80904 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 17:15:34.265000 80904 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:15:34.438000 23402 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 17:15:34.438000 23402 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:15:34.438000 23402 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 17:15:34.438000 23402 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:15:34.466000 103063 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 17:15:34.466000 103063 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:15:34.466000 103063 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 17:15:34.466000 103063 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:15:35.673000 24145 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 17:15:35.673000 24145 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:15:35.673000 24145 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 17:15:35.673000 24145 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:15:35.674000 41336 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 17:15:35.674000 41336 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:15:35.674000 41336 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 17:15:35.674000 41336 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:15:35.740000 14506 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 17:15:35.740000 14506 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:15:35.740000 14506 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 17:15:35.740000 14506 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:15:35.854000 17105 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 17:15:35.854000 17105 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:15:35.854000 17105 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 17:15:35.854000 17105 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-16 17:15:43,997] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:43,997] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:43,997] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:44,011] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:44,011] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:44,027] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:44,027] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:44,027] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:44,041] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:44,072] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:44,073] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:44,073] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:44,080] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:44,080] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:44,080] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:44,080] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:45,072] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:45,088] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:45,117] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:45,118] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:45,119] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:45,185] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:45,270] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:45,292] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:45,292] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:45,301] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:45,308] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:45,317] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:45,362] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:45,363] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:45,386] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:45,394] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:46,319] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:46,321] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:46,321] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:46,323] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:46,324] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:46,325] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:46,327] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:46,327] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:46,328] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:46,328] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:46,328] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:46,329] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:46,329] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:46,762] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:46,765] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:46,875] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:46,875] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:46,876] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:46,877] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:46,883] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:46,883] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:46,883] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:46,902] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:46,905] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:46,906] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:46,907] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:46,908] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:46,910] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:46,911] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:47,599] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:47,599] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:47,600] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:47,641] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:47,646] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:47,647] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:47,647] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:47,658] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:47,684] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:47,692] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:47,770] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:47,780] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:47,885] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:47,886] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:47,895] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:47,897] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:48,006] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:48,103] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:48,184] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:48,192] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:48,194] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:48,219] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:48,222] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:48,249] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:48,250] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:48,250] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:48,251] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:48,253] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:48,271] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:48,273] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:48,782] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:48,785] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:48,785] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:48,785] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:48,785] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:48,785] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:48,785] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:48,785] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:48,780] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:48,781] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:48,781] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:48,781] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:48,791] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:48,791] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:48,792] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:48,792] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:50,160] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:50,174] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:50,175] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:50,176] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:50,179] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:50,179] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:50,180] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:50,180] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:50,722] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:50,722] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:50,745] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:50,746] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:50,758] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:50,758] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:50,758] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:50,758] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 17:15:51,725] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:51,726] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:51,726] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:51,726] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:51,726] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:51,726] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:51,726] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:51,726] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:51,744] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:51,744] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:51,744] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:51,744] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:51,744] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:51,744] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:51,745] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:51,745] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-16 17:15:51,747] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:52,174] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:52,228] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:52,305] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:52,307] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:52,308] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:52,309] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:52,312] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:52,313] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:52,313] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:52,365] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:52,372] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:52,375] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:52,376] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:52,377] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:52,377] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:52,377] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:53,965] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:53,965] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:53,965] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:53,965] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:53,965] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:53,968] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:53,970] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:53,970] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:54,113] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:54,113] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:54,114] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:54,114] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:54,114] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:54,114] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:54,114] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:54,114] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 17:15:54,423] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:54,515] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:54,540] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:54,540] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:54,546] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:54,546] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:54,547] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:54,548] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:54,558] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:54,688] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:54,689] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:54,689] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:54,695] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:54,696] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 17:15:54,697] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 17:15:54,701] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 621928 samples from VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl +Rank 0: Loading altas_windows +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 537672 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl +Rank 0: Loading altas_linux +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 21578 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl +Rank 0: Loading atlas_macos +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 3680 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl +Rank 0: Loading android_action_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 5621 samples from VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 11980 samples from VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl +Rank 0: Loading web_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9459 samples from VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 328 samples from VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 54 samples from VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 480 samples from VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 240 samples from VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 944 samples from VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 472 samples from VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading mac_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 1578 samples from VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20394 samples from VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl +Rank 0: Loading web_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 14285 samples from VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl +Rank 0: Loading android_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 3590 samples from VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 21422 samples from VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 100 samples from VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_aug_cropping_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 7675 samples from VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20116 samples from VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl +Rank 0: Loading iphone_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2520 samples from VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl +Rank 0: Loading mac_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7814 samples from VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl +Rank 0: Loading android_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 17838 samples from VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading android_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9008 samples from VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_canvas_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 624 samples from VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 100652 samples from VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl with all sampling strategy +Rank 0: Loaded 28346 samples from VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl +Rank 0: Loading android_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 4907 samples from VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2635 samples from VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5234 samples from VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading ubuntu_action_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl with all sampling strategy +Rank 0: Loaded 3404 samples from VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_1 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2855 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_2 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 655 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_3 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_4 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2643 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_1 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_2 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_3 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_4 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_5 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_6 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_7 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_8 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_crop_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 358 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7946 samples from VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3973 samples from VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 993 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 630 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 249 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2538 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1269 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 172 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl +Rank 0: Loading android_ocr_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl with all sampling strategy +Rank 0: Loaded 29878 samples from VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl +Rank 0: Loading mac_orc_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl with all sampling strategy +Rank 0: Loaded 4393 samples from VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 254 samples from VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_click_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl with random:80% sampling strategy +Rank 0: Loaded 1802 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 53 samples from VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 6578 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 3287 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 542 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_crop_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 270 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 10908 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 5453 samples from VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading android_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13950 samples from VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl +Rank 0: Loading windows_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 12390 samples from VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl +Rank 0: Loading web_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13402 samples from VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl +Rank 0: Loading icon_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl with all sampling strategy +Rank 0: Loaded 81303 samples from VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl +Rank 0: Loading mac_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 1251 samples from VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl +Rank 0: Loading iphone_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 27849 samples from VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl +Rank 0: Loading web_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl with random:80% sampling strategy +Rank 0: Loaded 51607 samples from VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl +Rank 0: Loading android_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl with random:80% sampling strategy +Rank 0: Loaded 6281 samples from VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl +Rank 0: Loading windows_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 25961 samples from VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl +Rank 0: Loading windows_cropping_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl with random:40% sampling strategy +Rank 0: Loaded 9763 samples from VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl +Rank 0: Loading iphone_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl with all sampling strategy +Rank 0: Loaded 16896 samples from VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl +Rank 0: Loading iphone_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl with all sampling strategy +Rank 0: Loaded 927 samples from VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl +Rank 0: Loading mac_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl with all sampling strategy +Rank 0: Loaded 3051 samples from VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl +Rank 0: Loading android_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 25217 samples from VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading android_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 12677 samples from VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading web_canvas_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl with random:40% sampling strategy +Rank 0: Loaded 1566 samples from VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl +Rank 0: Loading web_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 174170 samples from VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 24862 samples from VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl +Rank 0: Loading android_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl with random:80% sampling strategy +Rank 0: Loaded 9142 samples from VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl +Rank 0: Loading windows_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 4229 samples from VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 4200 samples from VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl with random:80% sampling strategy +Rank 0: Loaded 2868 samples from VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_crop_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1372 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 590 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl with all sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1692 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1077 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1070 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 431 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 292 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1509 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 1207 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading uibert_train_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 4646 samples from VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl +Rank 0: Loading openapp_taperception_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 2500 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_widget_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 14878 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_mug_grounding_d240812 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl with all sampling strategy +Rank 0: Loaded 26090 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl +Rank 0: Loading private_ui_phone_2403_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 24798 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ground_d240521_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl with all sampling strategy +Rank 0: Loaded 5008 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl +Rank 0: Loading private_ui_aig_share_2406_ground_d240612_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl with all sampling strategy +Rank 0: Loaded 7903 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl +Rank 0: Loading windows_pc_agent_e_planning_cot +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 55564 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl +Rank 0: Loading windows_pc_agent_e_navigation +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl with all sampling strategy +Rank 0: Loaded 27782 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl +Rank 0: Loading os_genesis_ac_training_data +Rank 0: Skipping os_genesis_ac_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_aw_training_data +Rank 0: Skipping os_genesis_aw_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_web_training +Rank 0: Skipping os_genesis_web_training due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_1 +Rank 0: Skipping gui_odyssey_plus_1 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_2 +Rank 0: Skipping gui_odyssey_plus_2 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_custom_3 +Rank 0: Skipping gui_odyssey_plus_custom_3 due to repeat_time=0 +Rank 0: Loading mm_gui_mid +Rank 0: Skipping mm_gui_mid due to repeat_time=0 +Rank 0: Loading text_gui_mid +Rank 0: Skipping text_gui_mid due to repeat_time=0 +Rank 0: Loading gui_mid_trajectory +Rank 0: Skipping gui_mid_trajectory due to repeat_time=0 +Rank 0: Loading ubuntu_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7024 samples from VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl +Rank 0: Loading windows_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3144 samples from VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl +Rank 0: Total training samples: 6240940 +Rank 0: Formatting inputs...Skip in lazy mode +W0816 17:23:41.221000 117429 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 17:23:41.221000 117429 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:23:41.221000 117429 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 17:23:41.221000 117429 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:23:41.269000 31339 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 17:23:41.269000 31339 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:23:41.269000 31339 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 17:23:41.269000 31339 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:23:41.672000 99520 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 17:23:41.672000 99520 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:23:41.672000 99520 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 17:23:41.672000 99520 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:23:41.826000 30659 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 17:23:41.826000 30659 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:23:41.826000 30659 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 17:23:41.826000 30659 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:23:41.884000 41323 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 17:23:41.884000 41323 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:23:41.884000 41323 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 17:23:41.884000 41323 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:23:42.136000 30372 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 17:23:42.136000 30372 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:23:42.136000 30372 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 17:23:42.136000 30372 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:23:42.143000 117667 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 17:23:42.143000 117667 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:23:42.143000 117667 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 17:23:42.143000 117667 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:23:44.430000 66621 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 17:23:44.430000 66621 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 17:23:44.430000 66621 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 17:23:44.430000 66621 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 21:01:58.721000 50715 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 21:01:58.721000 50715 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 21:01:58.721000 50715 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 21:01:58.721000 50715 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 21:01:58.728000 100739 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 21:01:58.728000 100739 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 21:01:58.728000 100739 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 21:01:58.728000 100739 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 21:02:02.849000 23464 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 21:02:02.849000 23464 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 21:02:02.849000 23464 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 21:02:02.849000 23464 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 21:02:03.950000 106705 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0816 21:02:03.950000 106705 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0816 21:02:03.950000 106705 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0816 21:02:03.950000 106705 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-16 21:03:02,107] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,107] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,114] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,115] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,143] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,145] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,149] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,149] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,152] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,152] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,152] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,164] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,160] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,160] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,171] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,156] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,156] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,156] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,157] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,167] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,168] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,168] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,173] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,173] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,173] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,175] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,177] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,177] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,177] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,176] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,176] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,173] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,179] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,180] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,180] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,180] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,182] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,184] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,184] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,184] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,190] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,190] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:02,190] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-16 21:03:15,916] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,923] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,923] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,917] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,917] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,917] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,918] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,918] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,926] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,926] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,927] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,927] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,927] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,927] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,927] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,932] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,932] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,934] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,934] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,934] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,935] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,937] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,937] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,935] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,936] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-16 21:03:15,936] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,961] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,962] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,963] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,964] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,964] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,964] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,964] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:15,965] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-16 21:03:16,392] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,414] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,421] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,440] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,443] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,475] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,497] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,500] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,505] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,508] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,510] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,510] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,510] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,524] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,525] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,529] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,531] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,531] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,533] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,540] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,543] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,543] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,544] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,545] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,548] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,548] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,550] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,559] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,560] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,561] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,568] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,566] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,573] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,578] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,568] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,568] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,577] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:16,581] [INFO] [config.py:733:__init__] Config mesh_deviYou are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU afteYou are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,583] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +r initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to mo[2025-08-16 21:03:16,584] [INFO] [config.py:733:__init__] Config mesh_deviYou are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ith `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,587] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,587] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,592] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,595] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:16,856] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:17,007] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:17,019] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:17,043] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:17,043] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:17,062] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:17,065] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-16 21:03:17,068] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-16 21:03:27,737] [INFO] [partition_parameters.py:348:__exit__] finished initializing model - num_params = 825, num_elems = 4.07B + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 621928 samples from VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl +Rank 0: Loading altas_windows +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 537672 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl +Rank 0: Loading altas_linux +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 21578 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl +Rank 0: Loading atlas_macos +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 3680 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl +Rank 0: Loading android_action_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 5621 samples from VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 11980 samples from VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl +Rank 0: Loading web_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9459 samples from VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 328 samples from VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 54 samples from VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 480 samples from VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 240 samples from VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 944 samples from VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 472 samples from VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading mac_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 1578 samples from VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20394 samples from VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl +Rank 0: Loading web_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 14285 samples from VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl +Rank 0: Loading android_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 3590 samples from VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 21422 samples from VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 100 samples from VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_aug_cropping_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 7675 samples from VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20116 samples from VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl +Rank 0: Loading iphone_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2520 samples from VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl +Rank 0: Loading mac_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7814 samples from VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl +Rank 0: Loading android_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 17838 samples from VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading android_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9008 samples from VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_canvas_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 624 samples from VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 100652 samples from VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl with all sampling strategy +Rank 0: Loaded 28346 samples from VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl +Rank 0: Loading android_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 4907 samples from VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2635 samples from VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5234 samples from VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading ubuntu_action_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl with all sampling strategy +Rank 0: Loaded 3404 samples from VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_1 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2855 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_2 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 655 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_3 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_4 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2643 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_1 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_2 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_3 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_4 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_5 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_6 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_7 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_8 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_crop_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 358 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7946 samples from VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3973 samples from VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 993 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 630 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 249 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2538 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1269 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 172 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl +Rank 0: Loading android_ocr_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl with all sampling strategy +Rank 0: Loaded 29878 samples from VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl +Rank 0: Loading mac_orc_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl with all sampling strategy +Rank 0: Loaded 4393 samples from VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 254 samples from VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_click_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl with random:80% sampling strategy +Rank 0: Loaded 1802 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 53 samples from VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 6578 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 3287 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 542 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_crop_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 270 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 10908 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 5453 samples from VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading android_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13950 samples from VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl +Rank 0: Loading windows_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 12390 samples from VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl +Rank 0: Loading web_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13402 samples from VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl +Rank 0: Loading icon_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl with all sampling strategy +Rank 0: Loaded 81303 samples from VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl +Rank 0: Loading mac_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 1251 samples from VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl +Rank 0: Loading iphone_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 27849 samples from VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl +Rank 0: Loading web_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl with random:80% sampling strategy +Rank 0: Loaded 51607 samples from VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl +Rank 0: Loading android_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl with random:80% sampling strategy +Rank 0: Loaded 6281 samples from VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl +Rank 0: Loading windows_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 25961 samples from VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl +Rank 0: Loading windows_cropping_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl with random:40% sampling strategy +Rank 0: Loaded 9763 samples from VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl +Rank 0: Loading iphone_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl with all sampling strategy +Rank 0: Loaded 16896 samples from VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl +Rank 0: Loading iphone_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl with all sampling strategy +Rank 0: Loaded 927 samples from VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl +Rank 0: Loading mac_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl with all sampling strategy +Rank 0: Loaded 3051 samples from VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl +Rank 0: Loading android_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 25217 samples from VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading android_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 12677 samples from VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading web_canvas_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl with random:40% sampling strategy +Rank 0: Loaded 1566 samples from VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl +Rank 0: Loading web_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 174170 samples from VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 24862 samples from VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl +Rank 0: Loading android_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl with random:80% sampling strategy +Rank 0: Loaded 9142 samples from VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl +Rank 0: Loading windows_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 4229 samples from VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 4200 samples from VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl with random:80% sampling strategy +Rank 0: Loaded 2868 samples from VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_crop_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1372 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 590 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl with all sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1692 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1077 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1070 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 431 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 292 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1509 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 1207 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading uibert_train_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 4646 samples from VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl +Rank 0: Loading openapp_taperception_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 2500 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_widget_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 14878 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_mug_grounding_d240812 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl with all sampling strategy +Rank 0: Loaded 26090 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl +Rank 0: Loading private_ui_phone_2403_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 24798 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ground_d240521_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl with all sampling strategy +Rank 0: Loaded 5008 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl +Rank 0: Loading private_ui_aig_share_2406_ground_d240612_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl with all sampling strategy +Rank 0: Loaded 7903 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl +Rank 0: Loading windows_pc_agent_e_planning_cot +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 55564 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl +Rank 0: Loading windows_pc_agent_e_navigation +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl with all sampling strategy +Rank 0: Loaded 27782 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl +Rank 0: Loading os_genesis_ac_training_data +Rank 0: Skipping os_genesis_ac_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_aw_training_data +Rank 0: Skipping os_genesis_aw_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_web_training +Rank 0: Skipping os_genesis_web_training due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_1 +Rank 0: Skipping gui_odyssey_plus_1 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_2 +Rank 0: Skipping gui_odyssey_plus_2 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_custom_3 +Rank 0: Skipping gui_odyssey_plus_custom_3 due to repeat_time=0 +Rank 0: Loading mm_gui_mid +Rank 0: Skipping mm_gui_mid due to repeat_time=0 +Rank 0: Loading text_gui_mid +Rank 0: Skipping text_gui_mid due to repeat_time=0 +Rank 0: Loading gui_mid_trajectory +Rank 0: Skipping gui_mid_trajectory due to repeat_time=0 +Rank 0: Loading ubuntu_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7024 samples from VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl +Rank 0: Loading windows_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3144 samples from VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl +Rank 0: Total training samples: 6240940 +Rank 0: Formatting inputs...Skip in lazy mode +Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. +Rank 0: Length of multimodal samples: 6230528, pure textual samples: 9984 +Parameter Offload: Total persistent parameters: 755712 in 408 params + 0%| | 0/12188 [00:00 + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 899, in process_image_unified + visual_processed = processor.preprocess(image, return_tensors="pt") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 432, in preprocess + patches, image_grid_thw = self._preprocess( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 252, in _preprocess + resized_height, resized_width = smart_resize( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 69, in smart_resize + raise ValueError(f"height:{height} and width:{width} must be larger than factor:{factor}") +ValueError: height:32 and width:26 must be larger than factor:28 +[Try #0] Failed to fetch sample 2163176 in VC:s3://gui-agent/jedi/images/figma400k/figma400k_extracted/. Exception: height:32 and width:26 must be larger than factor:28 +Problematic sample: {'image': '11c49bcb727e743e8497fa3f8ce1470f7557b6d2b1f6363f08799fec12624ab8.png', 'conversations': [{'from': 'human', 'value': '\nThe position of this The element is a profile picture, commonly used as a button or link to user account settings. can be described as:\nThe profile picture is positioned in the top-right corner of the screen. It is adjacent to a circular button with a compass icon, which is located directly below it. The profile picture is separate from the main map area, which occupies the majority of the screen.'}, {'from': 'gpt', 'value': '[[0, 0, 1000, 1000]]', 'recipient': 'all', 'end_turn': True}]} + 0%| | 17/12188 [03:29<28:35:30, 8.46s/it] {'loss': 0.7235, 'grad_norm': 4.605029459841018, 'learning_rate': 4.3715846994535524e-07, 'epoch': 0.0} + 0%| | 17/12188 [03:29<28:35:30, 8.46s/it] 0%| | 18/12188 [03:37<28:06:21, 8.31s/it] {'loss': 0.7449, 'grad_norm': 4.721653573484392, 'learning_rate': 4.6448087431693996e-07, 'epoch': 0.0} + 0%| | 18/12188 [03:37<28:06:21, 8.31s/it] 0%| | 19/12188 [03:44<26:38:58, 7.88s/it] {'loss': 0.7197, 'grad_norm': 3.9254751916372115, 'learning_rate': 4.918032786885246e-07, 'epoch': 0.0} + 0%| | 19/12188 [03:44<26:38:58, 7.88s/it] 0%| | 20/12188 [03:51<25:38:10, 7.58s/it] {'loss': 0.7328, 'grad_norm': 3.996290247318457, 'learning_rate': 5.191256830601094e-07, 'epoch': 0.0} + 0%| | 20/12188 [03:51<25:38:10, 7.58s/it] 0%| | 21/12188 [03:59<26:33:04, 7.86s/it] {'loss': 0.7397, 'grad_norm': 4.0457784984003045, 'learning_rate': 5.46448087431694e-07, 'epoch': 0.0} + 0%| | 21/12188 [03:59<26:33:04, 7.86s/it] 0%| | 22/12188 [04:08<27:11:35, 8.05s/it] {'loss': 0.7729, 'grad_norm': 3.816381637003003, 'learning_rate': 5.737704918032787e-07, 'epoch': 0.0} + 0%| | 22/12188 [04:08<27:11:35, 8.05s/it] 0%| | 23/12188 [04:16<27:28:49, 8.13s/it] {'loss': 0.6979, 'grad_norm': 3.5932684645988826, 'learning_rate': 6.010928961748634e-07, 'epoch': 0.0} + 0%| | 23/12188 [04:16<27:28:49, 8.13s/it] 0%| | 24/12188 [04:24<27:45:17, 8.21s/it] {'loss': 0.7364, 'grad_norm': 3.5679211998667286, 'learning_rate': 6.284153005464482e-07, 'epoch': 0.0} + 0%| | 24/12188 [04:24<27:45:17, 8.21s/it] 0%| | 25/12188 [04:32<27:36:36, 8.17s/it] {'loss': 0.7886, 'grad_norm': 3.637723665750606, 'learning_rate': 6.557377049180328e-07, 'epoch': 0.0} + 0%| | 25/12188 [04:32<27:36:36, 8.17s/it] 0%| | 26/12188 [04:39<26:15:27, 7.77s/it] {'loss': 0.7113, 'grad_norm': 2.99762130528957, 'learning_rate': 6.830601092896176e-07, 'epoch': 0.0} + 0%| | 26/12188 [04:39<26:15:27, 7.77s/it] 0%| | 27/12188 [04:48<27:36:45, 8.17s/it] {'loss': 0.68, 'grad_norm': 2.387546839084648, 'learning_rate': 7.103825136612022e-07, 'epoch': 0.0} + 0%| | 27/12188 [04:48<27:36:45, 8.17s/it] 0%| | 28/12188 [04:56<26:53:43, 7.96s/it] {'loss': 0.6626, 'grad_norm': 2.429525154972051, 'learning_rate': 7.377049180327869e-07, 'epoch': 0.0} + 0%| | 28/12188 [04:56<26:53:43, 7.96s/it] 0%| | 29/12188 [05:04<26:44:04, 7.92s/it] {'loss': 0.7016, 'grad_norm': 2.6305168982601916, 'learning_rate': 7.650273224043716e-07, 'epoch': 0.0} + 0%| | 29/12188 [05:04<26:44:04, 7.92s/it] 0%| | 30/12188 [05:10<25:25:10, 7.53s/it] {'loss': 0.7409, 'grad_norm': 2.5415775371967095, 'learning_rate': 7.923497267759564e-07, 'epoch': 0.0} + 0%| | 30/12188 [05:10<25:25:10, 7.53s/it] 0%| | 31/12188 [05:20<28:06:57, 8.33s/it] {'loss': 0.6854, 'grad_norm': 2.252691874380961, 'learning_rate': 8.196721311475409e-07, 'epoch': 0.0} + 0%| | 31/12188 [05:20<28:06:57, 8.33s/it] 0%| | 32/12188 [05:28<27:03:42, 8.01s/it] {'loss': 0.7208, 'grad_norm': 2.1897036846870757, 'learning_rate': 8.469945355191257e-07, 'epoch': 0.0} + 0%| | 32/12188 [05:28<27:03:42, 8.01s/it] 0%| | 33/12188 [05:36<27:34:37, 8.17s/it] {'loss': 0.6949, 'grad_norm': 2.124872376249603, 'learning_rate': 8.743169398907105e-07, 'epoch': 0.0} + 0%| | 33/12188 [05:36<27:34:37, 8.17s/it] 0%| | 34/12188 [05:43<26:16:51, 7.78s/it] {'loss': 0.7168, 'grad_norm': 2.2571871851930325, 'learning_rate': 9.016393442622952e-07, 'epoch': 0.0} + 0%| | 34/12188 [05:43<26:16:51, 7.78s/it] 0%| | 35/12188 [05:51<26:34:19, 7.87s/it] {'loss': 0.7352, 'grad_norm': 2.1045966418835604, 'learning_rate': 9.289617486338799e-07, 'epoch': 0.0} + 0%| | 35/12188 [05:51<26:34:19, 7.87s/it] 0%| | 36/12188 [06:00<27:02:12, 8.01s/it] {'loss': 0.6897, 'grad_norm': 1.95532013095945, 'learning_rate': 9.562841530054645e-07, 'epoch': 0.0} + 0%| | 36/12188 [06:00<27:02:12, 8.01s/it] 0%| | 37/12188 [06:08<27:18:29, 8.09s/it] {'loss': 0.6535, 'grad_norm': 1.9650517839666974, 'learning_rate': 9.836065573770493e-07, 'epoch': 0.0} + 0%| | 37/12188 [06:08<27:18:29, 8.09s/it] 0%| | 38/12188 [06:17<27:57:26, 8.28s/it] {'loss': 0.5977, 'grad_norm': 1.6796092327237084, 'learning_rate': 1.010928961748634e-06, 'epoch': 0.0} + 0%| | 38/12188 [06:17<27:57:26, 8.28s/it] 0%| | 39/12188 [06:26<29:11:33, 8.65s/it] {'loss': 0.679, 'grad_norm': 1.6448222590189212, 'learning_rate': 1.0382513661202188e-06, 'epoch': 0.0} + 0%| | 39/12188 [06:26<29:11:33, 8.65s/it] 0%| | 40/12188 [06:34<28:52:07, 8.56s/it] {'loss': 0.6433, 'grad_norm': 1.4716278384447392, 'learning_rate': 1.0655737704918034e-06, 'epoch': 0.0} + 0%| | 40/12188 [06:34<28:52:07, 8.56s/it] 0%| | 41/12188 [06:43<29:05:52, 8.62s/it] {'loss': 0.6698, 'grad_norm': 1.5523126314586277, 'learning_rate': 1.092896174863388e-06, 'epoch': 0.0} + 0%| | 41/12188 [06:43<29:05:52, 8.62s/it] 0%| | 42/12188 [06:50<27:09:17, 8.05s/it] {'loss': 0.646, 'grad_norm': 1.4096320940338003, 'learning_rate': 1.1202185792349727e-06, 'epoch': 0.0} + 0%| | 42/12188 [06:50<27:09:17, 8.05s/it] 0%| | 43/12188 [06:57<25:45:05, 7.63s/it] {'loss': 0.6442, 'grad_norm': 1.2804099242960334, 'learning_rate': 1.1475409836065575e-06, 'epoch': 0.0} + 0%| | 43/12188 [06:57<25:45:05, 7.63s/it] 0%| | 44/12188 [07:04<25:32:38, 7.57s/it] {'loss': 0.6568, 'grad_norm': 1.3057129544831738, 'learning_rate': 1.1748633879781422e-06, 'epoch': 0.0} + 0%| | 44/12188 [07:04<25:32:38, 7.57s/it] 0%| | 45/12188 [07:12<26:09:04, 7.75s/it] {'loss': 0.6428, 'grad_norm': 1.188954270959499, 'learning_rate': 1.2021857923497268e-06, 'epoch': 0.0} + 0%| | 45/12188 [07:12<26:09:04, 7.75s/it] 0%| | 46/12188 [07:22<28:14:54, 8.38s/it] {'loss': 0.6073, 'grad_norm': 1.2591055307310337, 'learning_rate': 1.2295081967213116e-06, 'epoch': 0.0} + 0%| | 46/12188 [07:22<28:14:54, 8.38s/it] 0%| | 47/12188 [07:31<28:27:16, 8.44s/it] {'loss': 0.594, 'grad_norm': 1.2069297325506247, 'learning_rate': 1.2568306010928963e-06, 'epoch': 0.0} + 0%| | 47/12188 [07:31<28:27:16, 8.44s/it] 0%| | 48/12188 [07:38<27:44:45, 8.23s/it] {'loss': 0.7037, 'grad_norm': 1.3214514535959614, 'learning_rate': 1.2841530054644811e-06, 'epoch': 0.0} + 0%| | 48/12188 [07:38<27:44:45, 8.23s/it] 0%| | 49/12188 [07:47<28:03:10, 8.32s/it] {'loss': 0.5834, 'grad_norm': 1.3776912000057449, 'learning_rate': 1.3114754098360657e-06, 'epoch': 0.0} + 0%| | 49/12188 [07:47<28:03:10, 8.32s/it] 0%| | 50/12188 [07:54<26:38:55, 7.90s/it] {'loss': 0.5986, 'grad_norm': 1.0554211703822116, 'learning_rate': 1.3387978142076505e-06, 'epoch': 0.0} + 0%| | 50/12188 [07:54<26:38:55, 7.90s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fe65a86dee0> +[Try #0] Failed to fetch sample 4755380 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fe65a86dee0> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Class: ux-action'"}, {'from': 'gpt', 'value': '\nclick(x=0.939, y=0.5095)\n'}]} + 0%| | 51/12188 [08:04<28:31:11, 8.46s/it] {'loss': 0.604, 'grad_norm': 1.2086440972180963, 'learning_rate': 1.3661202185792352e-06, 'epoch': 0.0} + 0%| | 51/12188 [08:04<28:31:11, 8.46s/it] 0%| | 52/12188 [08:10<26:49:44, 7.96s/it] {'loss': 0.6381, 'grad_norm': 1.3702917120401334, 'learning_rate': 1.3934426229508196e-06, 'epoch': 0.0} + 0%| | 52/12188 [08:10<26:49:44, 7.96s/it] 0%| | 53/12188 [08:17<25:50:38, 7.67s/it] {'loss': 0.6388, 'grad_norm': 1.0577331108318386, 'learning_rate': 1.4207650273224043e-06, 'epoch': 0.0} + 0%| | 53/12188 [08:17<25:50:38, 7.67s/it] 0%| | 54/12188 [08:26<26:22:13, 7.82s/it] {'loss': 0.5692, 'grad_norm': 6.9191466355297395, 'learning_rate': 1.4480874316939891e-06, 'epoch': 0.0} + 0%| | 54/12188 [08:26<26:22:13, 7.82s/it] 0%| | 55/12188 [08:34<27:23:55, 8.13s/it] {'loss': 0.5819, 'grad_norm': 0.9762202255589204, 'learning_rate': 1.4754098360655739e-06, 'epoch': 0.0} + 0%| | 55/12188 [08:34<27:23:55, 8.13s/it] 0%| | 56/12188 [08:43<28:22:32, 8.42s/it] {'loss': 0.6222, 'grad_norm': 1.0484078869988498, 'learning_rate': 1.5027322404371587e-06, 'epoch': 0.0} + 0%| | 56/12188 [08:43<28:22:32, 8.42s/it] 0%| | 57/12188 [08:51<27:07:02, 8.05s/it] {'loss': 0.6438, 'grad_norm': 1.031760741196525, 'learning_rate': 1.5300546448087432e-06, 'epoch': 0.0} + 0%| | 57/12188 [08:51<27:07:02, 8.05s/it] 0%| | 58/12188 [08:59<27:32:54, 8.18s/it] {'loss': 0.642, 'grad_norm': 1.0070658465555649, 'learning_rate': 1.557377049180328e-06, 'epoch': 0.0} + 0%| | 58/12188 [08:59<27:32:54, 8.18s/it] 0%| | 59/12188 [09:08<28:30:26, 8.46s/it] {'loss': 0.5958, 'grad_norm': 0.9572054708843818, 'learning_rate': 1.5846994535519128e-06, 'epoch': 0.0} + 0%| | 59/12188 [09:08<28:30:26, 8.46s/it] 0%| | 60/12188 [09:17<28:44:28, 8.53s/it] {'loss': 0.62, 'grad_norm': 1.066797718390194, 'learning_rate': 1.6120218579234975e-06, 'epoch': 0.0} + 0%| | 60/12188 [09:17<28:44:28, 8.53s/it] 1%| | 61/12188 [09:24<27:02:37, 8.03s/it] {'loss': 0.698, 'grad_norm': 1.154599125092965, 'learning_rate': 1.6393442622950819e-06, 'epoch': 0.01} + 1%| | 61/12188 [09:24<27:02:37, 8.03s/it] 1%| | 62/12188 [09:32<26:55:59, 8.00s/it] {'loss': 0.5454, 'grad_norm': 0.9740798049139792, 'learning_rate': 1.6666666666666667e-06, 'epoch': 0.01} + 1%| | 62/12188 [09:32<26:55:59, 8.00s/it] 1%| | 63/12188 [09:39<25:57:14, 7.71s/it] {'loss': 0.6054, 'grad_norm': 0.9428524504581464, 'learning_rate': 1.6939890710382514e-06, 'epoch': 0.01} + 1%| | 63/12188 [09:39<25:57:14, 7.71s/it] 1%| | 64/12188 [09:48<27:02:57, 8.03s/it] {'loss': 0.6079, 'grad_norm': 0.9886092552869474, 'learning_rate': 1.7213114754098362e-06, 'epoch': 0.01} + 1%| | 64/12188 [09:48<27:02:57, 8.03s/it] 1%| | 65/12188 [09:55<26:10:31, 7.77s/it] {'loss': 0.5535, 'grad_norm': 0.8445250375195416, 'learning_rate': 1.748633879781421e-06, 'epoch': 0.01} + 1%| | 65/12188 [09:55<26:10:31, 7.77s/it] 1%| | 66/12188 [10:02<25:14:46, 7.50s/it] {'loss': 0.6392, 'grad_norm': 0.9178397444285696, 'learning_rate': 1.7759562841530055e-06, 'epoch': 0.01} + 1%| | 66/12188 [10:02<25:14:46, 7.50s/it] 1%| | 67/12188 [10:08<24:40:28, 7.33s/it] {'loss': 0.5682, 'grad_norm': 0.9473413958976997, 'learning_rate': 1.8032786885245903e-06, 'epoch': 0.01} + 1%| | 67/12188 [10:09<24:40:28, 7.33s/it] 1%| | 68/12188 [10:16<24:53:20, 7.39s/it] {'loss': 0.5532, 'grad_norm': 0.8040348000715498, 'learning_rate': 1.830601092896175e-06, 'epoch': 0.01} + 1%| | 68/12188 [10:16<24:53:20, 7.39s/it] 1%| | 69/12188 [10:25<26:00:21, 7.73s/it] {'loss': 0.5506, 'grad_norm': 0.7785719679258585, 'learning_rate': 1.8579234972677599e-06, 'epoch': 0.01} + 1%| | 69/12188 [10:25<26:00:21, 7.73s/it] 1%| | 70/12188 [10:31<24:55:07, 7.40s/it] {'loss': 0.5929, 'grad_norm': 0.8681680375754203, 'learning_rate': 1.8852459016393442e-06, 'epoch': 0.01} + 1%| | 70/12188 [10:31<24:55:07, 7.40s/it] 1%| | 71/12188 [10:40<26:11:28, 7.78s/it] {'loss': 0.576, 'grad_norm': 0.8288211635891115, 'learning_rate': 1.912568306010929e-06, 'epoch': 0.01} + 1%| | 71/12188 [10:40<26:11:28, 7.78s/it] 1%| | 72/12188 [10:47<25:46:14, 7.66s/it] {'loss': 0.5882, 'grad_norm': 0.8409409078712426, 'learning_rate': 1.939890710382514e-06, 'epoch': 0.01} + 1%| | 72/12188 [10:47<25:46:14, 7.66s/it] 1%| | 73/12188 [10:55<25:56:22, 7.71s/it] {'loss': 0.6139, 'grad_norm': 0.8114591818499827, 'learning_rate': 1.9672131147540985e-06, 'epoch': 0.01} + 1%| | 73/12188 [10:55<25:56:22, 7.71s/it] 1%| | 74/12188 [11:03<26:16:57, 7.81s/it] {'loss': 0.5905, 'grad_norm': 0.808525241283981, 'learning_rate': 1.994535519125683e-06, 'epoch': 0.01} + 1%| | 74/12188 [11:03<26:16:57, 7.81s/it] 1%| | 75/12188 [11:10<25:10:18, 7.48s/it] {'loss': 0.5822, 'grad_norm': 0.7894713533720883, 'learning_rate': 2.021857923497268e-06, 'epoch': 0.01} + 1%| | 75/12188 [11:10<25:10:18, 7.48s/it] 1%| | 76/12188 [11:17<24:58:30, 7.42s/it] {'loss': 0.6153, 'grad_norm': 0.7554413887760214, 'learning_rate': 2.0491803278688526e-06, 'epoch': 0.01} + 1%| | 76/12188 [11:17<24:58:30, 7.42s/it] 1%| | 77/12188 [11:24<24:23:55, 7.25s/it] {'loss': 0.5785, 'grad_norm': 0.810068435494818, 'learning_rate': 2.0765027322404376e-06, 'epoch': 0.01} + 1%| | 77/12188 [11:24<24:23:55, 7.25s/it] 1%| | 78/12188 [11:31<24:16:00, 7.21s/it] {'loss': 0.5489, 'grad_norm': 0.8218075222557052, 'learning_rate': 2.103825136612022e-06, 'epoch': 0.01} + 1%| | 78/12188 [11:31<24:16:00, 7.21s/it] 1%| | 79/12188 [11:39<25:18:04, 7.52s/it] {'loss': 0.6237, 'grad_norm': 0.7766768820882174, 'learning_rate': 2.1311475409836067e-06, 'epoch': 0.01} + 1%| | 79/12188 [11:39<25:18:04, 7.52s/it] 1%| | 80/12188 [11:46<24:51:30, 7.39s/it] {'loss': 0.5739, 'grad_norm': 0.8614006502657133, 'learning_rate': 2.1584699453551913e-06, 'epoch': 0.01} + 1%| | 80/12188 [11:46<24:51:30, 7.39s/it] 1%| | 81/12188 [11:54<25:04:04, 7.45s/it] {'loss': 0.576, 'grad_norm': 0.7128586915540946, 'learning_rate': 2.185792349726776e-06, 'epoch': 0.01} + 1%| | 81/12188 [11:54<25:04:04, 7.45s/it] 1%| | 82/12188 [12:01<24:48:52, 7.38s/it] {'loss': 0.6007, 'grad_norm': 0.7852664186569185, 'learning_rate': 2.213114754098361e-06, 'epoch': 0.01} + 1%| | 82/12188 [12:01<24:48:52, 7.38s/it] 1%| | 83/12188 [12:13<29:13:58, 8.69s/it] {'loss': 0.5646, 'grad_norm': 0.7315694270843645, 'learning_rate': 2.2404371584699454e-06, 'epoch': 0.01} + 1%| | 83/12188 [12:13<29:13:58, 8.69s/it] 1%| | 84/12188 [12:20<27:28:18, 8.17s/it] {'loss': 0.5845, 'grad_norm': 0.7338090571728428, 'learning_rate': 2.2677595628415304e-06, 'epoch': 0.01} + 1%| | 84/12188 [12:20<27:28:18, 8.17s/it] 1%| | 85/12188 [12:30<29:03:25, 8.64s/it] {'loss': 0.6037, 'grad_norm': 0.7797548599203646, 'learning_rate': 2.295081967213115e-06, 'epoch': 0.01} + 1%| | 85/12188 [12:30<29:03:25, 8.64s/it] 1%| | 86/12188 [12:41<31:36:03, 9.40s/it] {'loss': 0.6085, 'grad_norm': 0.7643951463830156, 'learning_rate': 2.3224043715847e-06, 'epoch': 0.01} + 1%| | 86/12188 [12:41<31:36:03, 9.40s/it] 1%| | 87/12188 [12:48<28:56:35, 8.61s/it] {'loss': 0.5845, 'grad_norm': 0.7481465252890332, 'learning_rate': 2.3497267759562845e-06, 'epoch': 0.01} + 1%| | 87/12188 [12:48<28:56:35, 8.61s/it] 1%| | 88/12188 [12:58<30:57:51, 9.21s/it] {'loss': 0.5859, 'grad_norm': 0.7572707341912558, 'learning_rate': 2.377049180327869e-06, 'epoch': 0.01} + 1%| | 88/12188 [12:58<30:57:51, 9.21s/it] 1%| | 89/12188 [13:05<28:50:59, 8.58s/it] {'loss': 0.5784, 'grad_norm': 0.7492814090628429, 'learning_rate': 2.4043715846994536e-06, 'epoch': 0.01} + 1%| | 89/12188 [13:05<28:50:59, 8.58s/it] 1%| | 90/12188 [13:13<27:58:26, 8.32s/it] {'loss': 0.5947, 'grad_norm': 0.7552663040151634, 'learning_rate': 2.431693989071038e-06, 'epoch': 0.01} + 1%| | 90/12188 [13:13<27:58:26, 8.32s/it] 1%| | 91/12188 [13:20<26:20:48, 7.84s/it] {'loss': 0.6349, 'grad_norm': 0.7527265178556873, 'learning_rate': 2.459016393442623e-06, 'epoch': 0.01} + 1%| | 91/12188 [13:20<26:20:48, 7.84s/it] 1%| | 92/12188 [13:27<25:25:47, 7.57s/it] {'loss': 0.5774, 'grad_norm': 0.7704598114019927, 'learning_rate': 2.4863387978142077e-06, 'epoch': 0.01} + 1%| | 92/12188 [13:27<25:25:47, 7.57s/it] 1%| | 93/12188 [13:34<25:20:54, 7.54s/it] {'loss': 0.5595, 'grad_norm': 0.7404694632419342, 'learning_rate': 2.5136612021857927e-06, 'epoch': 0.01} + 1%| | 93/12188 [13:34<25:20:54, 7.54s/it] 1%| | 94/12188 [13:41<24:45:45, 7.37s/it] {'loss': 0.5316, 'grad_norm': 0.7057068463369247, 'learning_rate': 2.5409836065573773e-06, 'epoch': 0.01} + 1%| | 94/12188 [13:41<24:45:45, 7.37s/it] 1%| | 95/12188 [13:48<24:04:56, 7.17s/it] {'loss': 0.5324, 'grad_norm': 0.7810985469021622, 'learning_rate': 2.5683060109289622e-06, 'epoch': 0.01} + 1%| | 95/12188 [13:48<24:04:56, 7.17s/it] 1%| | 96/12188 [13:57<25:59:19, 7.74s/it] {'loss': 0.5695, 'grad_norm': 0.7836959543417447, 'learning_rate': 2.595628415300547e-06, 'epoch': 0.01} + 1%| | 96/12188 [13:57<25:59:19, 7.74s/it] 1%| | 97/12188 [14:04<24:49:59, 7.39s/it] {'loss': 0.5986, 'grad_norm': 0.7059435573011988, 'learning_rate': 2.6229508196721314e-06, 'epoch': 0.01} + 1%| | 97/12188 [14:04<24:49:59, 7.39s/it] 1%| | 98/12188 [14:10<24:10:56, 7.20s/it] {'loss': 0.5195, 'grad_norm': 0.7165599446303674, 'learning_rate': 2.6502732240437163e-06, 'epoch': 0.01} + 1%| | 98/12188 [14:10<24:10:56, 7.20s/it] 1%| | 99/12188 [14:20<26:31:26, 7.90s/it] {'loss': 0.5574, 'grad_norm': 0.9708890958776207, 'learning_rate': 2.677595628415301e-06, 'epoch': 0.01} + 1%| | 99/12188 [14:20<26:31:26, 7.90s/it] 1%| | 100/12188 [14:26<25:11:28, 7.50s/it] {'loss': 0.5278, 'grad_norm': 0.7720714894062014, 'learning_rate': 2.704918032786886e-06, 'epoch': 0.01} + 1%| | 100/12188 [14:26<25:11:28, 7.50s/it] 1%| | 101/12188 [14:37<28:03:23, 8.36s/it] {'loss': 0.5908, 'grad_norm': 0.7895812343861897, 'learning_rate': 2.7322404371584705e-06, 'epoch': 0.01} + 1%| | 101/12188 [14:37<28:03:23, 8.36s/it] 1%| | 102/12188 [14:44<26:35:02, 7.92s/it] {'loss': 0.5496, 'grad_norm': 0.9614735702645801, 'learning_rate': 2.7595628415300546e-06, 'epoch': 0.01} + 1%| | 102/12188 [14:44<26:35:02, 7.92s/it] 1%| | 103/12188 [14:50<25:16:11, 7.53s/it] {'loss': 0.5445, 'grad_norm': 0.7307098529848282, 'learning_rate': 2.786885245901639e-06, 'epoch': 0.01} + 1%| | 103/12188 [14:50<25:16:11, 7.53s/it] 1%| | 104/12188 [14:57<24:32:43, 7.31s/it] {'loss': 0.6102, 'grad_norm': 0.7387165181019846, 'learning_rate': 2.814207650273224e-06, 'epoch': 0.01} + 1%| | 104/12188 [14:57<24:32:43, 7.31s/it] 1%| | 105/12188 [15:07<26:53:09, 8.01s/it] {'loss': 0.5578, 'grad_norm': 0.7753005211816525, 'learning_rate': 2.8415300546448087e-06, 'epoch': 0.01} + 1%| | 105/12188 [15:07<26:53:09, 8.01s/it][2025-08-16 21:25:42,192] [WARNING] [stage3.py:2118:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time + 1%| | 106/12188 [15:16<28:05:28, 8.37s/it] {'loss': 0.6299, 'grad_norm': 0.7611701431516128, 'learning_rate': 2.8688524590163937e-06, 'epoch': 0.01} + 1%| | 106/12188 [15:16<28:05:28, 8.37s/it] 1%| | 107/12188 [15:23<26:45:17, 7.97s/it] {'loss': 0.5987, 'grad_norm': 0.8933202473478205, 'learning_rate': 2.8961748633879782e-06, 'epoch': 0.01} + 1%| | 107/12188 [15:23<26:45:17, 7.97s/it] 1%| | 108/12188 [15:32<28:16:49, 8.43s/it] {'loss': 0.5314, 'grad_norm': 0.7174368991870086, 'learning_rate': 2.923497267759563e-06, 'epoch': 0.01} + 1%| | 108/12188 [15:32<28:16:49, 8.43s/it] 1%| | 109/12188 [15:40<27:23:30, 8.16s/it] {'loss': 0.5355, 'grad_norm': 0.7167502434034074, 'learning_rate': 2.9508196721311478e-06, 'epoch': 0.01} + 1%| | 109/12188 [15:40<27:23:30, 8.16s/it] 1%| | 110/12188 [15:50<29:26:35, 8.78s/it] {'loss': 0.5604, 'grad_norm': 0.73860332646601, 'learning_rate': 2.9781420765027323e-06, 'epoch': 0.01} + 1%| | 110/12188 [15:50<29:26:35, 8.78s/it] 1%| | 111/12188 [15:58<28:46:27, 8.58s/it] {'loss': 0.5695, 'grad_norm': 0.7221766194271947, 'learning_rate': 3.0054644808743173e-06, 'epoch': 0.01} + 1%| | 111/12188 [15:58<28:46:27, 8.58s/it] 1%| | 112/12188 [16:07<29:20:05, 8.75s/it] {'loss': 0.5111, 'grad_norm': 0.7172231095738897, 'learning_rate': 3.032786885245902e-06, 'epoch': 0.01} + 1%| | 112/12188 [16:07<29:20:05, 8.75s/it] 1%| | 113/12188 [16:16<28:45:56, 8.58s/it] {'loss': 0.4964, 'grad_norm': 0.7352308501356551, 'learning_rate': 3.0601092896174864e-06, 'epoch': 0.01} + 1%| | 113/12188 [16:16<28:45:56, 8.58s/it] 1%| | 114/12188 [16:23<27:35:08, 8.23s/it] {'loss': 0.5904, 'grad_norm': 0.713235393345211, 'learning_rate': 3.0874316939890714e-06, 'epoch': 0.01} + 1%| | 114/12188 [16:23<27:35:08, 8.23s/it] 1%| | 115/12188 [16:30<26:08:53, 7.80s/it] {'loss': 0.5955, 'grad_norm': 0.7283841882349938, 'learning_rate': 3.114754098360656e-06, 'epoch': 0.01} + 1%| | 115/12188 [16:30<26:08:53, 7.80s/it] 1%| | 116/12188 [16:37<25:24:01, 7.57s/it] {'loss': 0.512, 'grad_norm': 0.7272379352954161, 'learning_rate': 3.142076502732241e-06, 'epoch': 0.01} + 1%| | 116/12188 [16:37<25:24:01, 7.57s/it] 1%| | 117/12188 [16:44<24:32:03, 7.32s/it] {'loss': 0.5497, 'grad_norm': 0.704140649234432, 'learning_rate': 3.1693989071038255e-06, 'epoch': 0.01} + 1%| | 117/12188 [16:44<24:32:03, 7.32s/it] 1%| | 118/12188 [16:50<23:57:45, 7.15s/it] {'loss': 0.5021, 'grad_norm': 0.7403776551067047, 'learning_rate': 3.1967213114754105e-06, 'epoch': 0.01} + 1%| | 118/12188 [16:50<23:57:45, 7.15s/it] 1%| | 119/12188 [17:01<27:06:35, 8.09s/it] {'loss': 0.4729, 'grad_norm': 0.6228051098562285, 'learning_rate': 3.224043715846995e-06, 'epoch': 0.01} + 1%| | 119/12188 [17:01<27:06:35, 8.09s/it] 1%| | 120/12188 [17:08<26:19:34, 7.85s/it] {'loss': 0.5042, 'grad_norm': 0.6624910693533389, 'learning_rate': 3.2513661202185792e-06, 'epoch': 0.01} + 1%| | 120/12188 [17:08<26:19:34, 7.85s/it] 1%| | 121/12188 [17:16<26:49:37, 8.00s/it] {'loss': 0.5146, 'grad_norm': 0.6919127112659911, 'learning_rate': 3.2786885245901638e-06, 'epoch': 0.01} + 1%| | 121/12188 [17:16<26:49:37, 8.00s/it] 1%| | 122/12188 [17:25<27:56:27, 8.34s/it] {'loss': 0.549, 'grad_norm': 0.7149759090075953, 'learning_rate': 3.3060109289617488e-06, 'epoch': 0.01} + 1%| | 122/12188 [17:25<27:56:27, 8.34s/it] 1%| | 123/12188 [17:33<26:42:38, 7.97s/it] {'loss': 0.5436, 'grad_norm': 0.8645606824175845, 'learning_rate': 3.3333333333333333e-06, 'epoch': 0.01} + 1%| | 123/12188 [17:33<26:42:38, 7.97s/it] 1%| | 124/12188 [17:39<25:39:14, 7.66s/it] {'loss': 0.5555, 'grad_norm': 0.7160560252655325, 'learning_rate': 3.3606557377049183e-06, 'epoch': 0.01} + 1%| | 124/12188 [17:39<25:39:14, 7.66s/it] 1%| | 125/12188 [17:50<28:10:31, 8.41s/it] {'loss': 0.5538, 'grad_norm': 0.7095889253209778, 'learning_rate': 3.387978142076503e-06, 'epoch': 0.01} + 1%| | 125/12188 [17:50<28:10:31, 8.41s/it] 1%| | 126/12188 [17:57<26:42:36, 7.97s/it] {'loss': 0.5698, 'grad_norm': 0.7481914394701366, 'learning_rate': 3.4153005464480874e-06, 'epoch': 0.01} + 1%| | 126/12188 [17:57<26:42:36, 7.97s/it] 1%| | 127/12188 [18:03<25:36:58, 7.65s/it] {'loss': 0.5439, 'grad_norm': 0.6758026641868979, 'learning_rate': 3.4426229508196724e-06, 'epoch': 0.01} + 1%| | 127/12188 [18:03<25:36:58, 7.65s/it] 1%| | 128/12188 [18:13<27:08:06, 8.10s/it] {'loss': 0.5382, 'grad_norm': 0.6974401977197344, 'learning_rate': 3.469945355191257e-06, 'epoch': 0.01} + 1%| | 128/12188 [18:13<27:08:06, 8.10s/it] 1%| | 129/12188 [18:20<26:14:27, 7.83s/it] {'loss': 0.5205, 'grad_norm': 0.7169904057460209, 'learning_rate': 3.497267759562842e-06, 'epoch': 0.01} + 1%| | 129/12188 [18:20<26:14:27, 7.83s/it] 1%| | 130/12188 [18:28<26:35:54, 7.94s/it] {'loss': 0.4966, 'grad_norm': 0.671314158253721, 'learning_rate': 3.5245901639344265e-06, 'epoch': 0.01} + 1%| | 130/12188 [18:28<26:35:54, 7.94s/it] 1%| | 131/12188 [18:36<26:58:26, 8.05s/it] {'loss': 0.5498, 'grad_norm': 0.7072336796566324, 'learning_rate': 3.551912568306011e-06, 'epoch': 0.01} + 1%| | 131/12188 [18:36<26:58:26, 8.05s/it] 1%| | 132/12188 [18:44<26:52:53, 8.03s/it] {'loss': 0.525, 'grad_norm': 0.7382420926227349, 'learning_rate': 3.579234972677596e-06, 'epoch': 0.01} + 1%| | 132/12188 [18:44<26:52:53, 8.03s/it] 1%| | 133/12188 [18:52<26:39:08, 7.96s/it] {'loss': 0.5442, 'grad_norm': 0.6995398537878451, 'learning_rate': 3.6065573770491806e-06, 'epoch': 0.01} + 1%| | 133/12188 [18:52<26:39:08, 7.96s/it] 1%| | 134/12188 [19:00<26:27:44, 7.90s/it] {'loss': 0.5198, 'grad_norm': 0.7534950981113788, 'learning_rate': 3.6338797814207656e-06, 'epoch': 0.01} + 1%| | 134/12188 [19:00<26:27:44, 7.90s/it] 1%| | 135/12188 [19:07<25:40:48, 7.67s/it] {'loss': 0.555, 'grad_norm': 0.735929378243434, 'learning_rate': 3.66120218579235e-06, 'epoch': 0.01} + 1%| | 135/12188 [19:07<25:40:48, 7.67s/it] 1%| | 136/12188 [19:14<24:55:46, 7.45s/it] {'loss': 0.4907, 'grad_norm': 0.7201764681653368, 'learning_rate': 3.6885245901639347e-06, 'epoch': 0.01} + 1%| | 136/12188 [19:14<24:55:46, 7.45s/it] 1%| | 137/12188 [19:22<25:29:38, 7.62s/it] {'loss': 0.5263, 'grad_norm': 0.6522720991896332, 'learning_rate': 3.7158469945355197e-06, 'epoch': 0.01} + 1%| | 137/12188 [19:22<25:29:38, 7.62s/it] 1%| | 138/12188 [19:31<27:17:47, 8.15s/it] {'loss': 0.5452, 'grad_norm': 0.7395099458945698, 'learning_rate': 3.7431693989071043e-06, 'epoch': 0.01} + 1%| | 138/12188 [19:31<27:17:47, 8.15s/it] 1%| | 139/12188 [19:40<27:40:36, 8.27s/it] {'loss': 0.5395, 'grad_norm': 0.7209513246536812, 'learning_rate': 3.7704918032786884e-06, 'epoch': 0.01} + 1%| | 139/12188 [19:40<27:40:36, 8.27s/it] 1%| | 140/12188 [19:47<26:16:19, 7.85s/it] {'loss': 0.5753, 'grad_norm': 0.7599055123909271, 'learning_rate': 3.7978142076502734e-06, 'epoch': 0.01} + 1%| | 140/12188 [19:47<26:16:19, 7.85s/it] 1%| | 141/12188 [19:55<27:09:37, 8.12s/it] {'loss': 0.4973, 'grad_norm': 0.6965762440737608, 'learning_rate': 3.825136612021858e-06, 'epoch': 0.01} + 1%| | 141/12188 [19:55<27:09:37, 8.12s/it] 1%| | 142/12188 [20:02<25:42:51, 7.68s/it] {'loss': 0.497, 'grad_norm': 0.7445968423045807, 'learning_rate': 3.852459016393443e-06, 'epoch': 0.01} + 1%| | 142/12188 [20:02<25:42:51, 7.68s/it] 1%| | 143/12188 [20:11<26:48:14, 8.01s/it] {'loss': 0.5779, 'grad_norm': 0.7667796118793652, 'learning_rate': 3.879781420765028e-06, 'epoch': 0.01} + 1%| | 143/12188 [20:11<26:48:14, 8.01s/it] 1%| | 144/12188 [20:18<25:29:15, 7.62s/it] {'loss': 0.5485, 'grad_norm': 0.7352525692982557, 'learning_rate': 3.907103825136612e-06, 'epoch': 0.01} + 1%| | 144/12188 [20:18<25:29:15, 7.62s/it] 1%| | 145/12188 [20:24<24:40:53, 7.38s/it] {'loss': 0.5416, 'grad_norm': 0.7388167010484292, 'learning_rate': 3.934426229508197e-06, 'epoch': 0.01} + 1%| | 145/12188 [20:24<24:40:53, 7.38s/it] 1%| | 146/12188 [20:32<24:38:34, 7.37s/it] {'loss': 0.5711, 'grad_norm': 0.7329334594149585, 'learning_rate': 3.961748633879782e-06, 'epoch': 0.01} + 1%| | 146/12188 [20:32<24:38:34, 7.37s/it] 1%| | 147/12188 [20:40<25:38:28, 7.67s/it] {'loss': 0.5973, 'grad_norm': 0.7341246080271276, 'learning_rate': 3.989071038251366e-06, 'epoch': 0.01} + 1%| | 147/12188 [20:40<25:38:28, 7.67s/it] 1%| | 148/12188 [20:47<24:49:35, 7.42s/it] {'loss': 0.5293, 'grad_norm': 0.7063940812731384, 'learning_rate': 4.016393442622951e-06, 'epoch': 0.01} + 1%| | 148/12188 [20:47<24:49:35, 7.42s/it] 1%| | 149/12188 [20:55<25:02:52, 7.49s/it] {'loss': 0.5078, 'grad_norm': 0.6440475877504753, 'learning_rate': 4.043715846994536e-06, 'epoch': 0.01} + 1%| | 149/12188 [20:55<25:02:52, 7.49s/it] 1%| | 150/12188 [21:01<24:14:15, 7.25s/it] {'loss': 0.5051, 'grad_norm': 0.7274766928347551, 'learning_rate': 4.07103825136612e-06, 'epoch': 0.01} + 1%| | 150/12188 [21:01<24:14:15, 7.25s/it] 1%| | 151/12188 [21:09<24:16:00, 7.26s/it] {'loss': 0.5666, 'grad_norm': 0.7772069598712157, 'learning_rate': 4.098360655737705e-06, 'epoch': 0.01} + 1%| | 151/12188 [21:09<24:16:00, 7.26s/it] 1%| | 152/12188 [21:16<24:20:40, 7.28s/it] {'loss': 0.5281, 'grad_norm': 0.6961686233459559, 'learning_rate': 4.12568306010929e-06, 'epoch': 0.01} + 1%| | 152/12188 [21:16<24:20:40, 7.28s/it] 1%|▏ | 153/12188 [21:26<26:37:46, 7.97s/it] {'loss': 0.5339, 'grad_norm': 0.7028795014580788, 'learning_rate': 4.153005464480875e-06, 'epoch': 0.01} + 1%|▏ | 153/12188 [21:26<26:37:46, 7.97s/it] 1%|▏ | 154/12188 [21:32<25:29:18, 7.62s/it] {'loss': 0.4681, 'grad_norm': 0.6977819368837961, 'learning_rate': 4.180327868852459e-06, 'epoch': 0.01} + 1%|▏ | 154/12188 [21:32<25:29:18, 7.62s/it] 1%|▏ | 155/12188 [21:40<25:31:56, 7.64s/it] {'loss': 0.5416, 'grad_norm': 0.689604405138438, 'learning_rate': 4.207650273224044e-06, 'epoch': 0.01} + 1%|▏ | 155/12188 [21:40<25:31:56, 7.64s/it] 1%|▏ | 156/12188 [21:48<26:15:53, 7.86s/it] {'loss': 0.5395, 'grad_norm': 0.7329166590311801, 'learning_rate': 4.234972677595629e-06, 'epoch': 0.01} + 1%|▏ | 156/12188 [21:48<26:15:53, 7.86s/it] 1%|▏ | 157/12188 [21:55<25:23:44, 7.60s/it] {'loss': 0.4868, 'grad_norm': 0.6875105879630147, 'learning_rate': 4.2622950819672135e-06, 'epoch': 0.01} + 1%|▏ | 157/12188 [21:55<25:23:44, 7.60s/it] 1%|▏ | 158/12188 [22:03<25:42:00, 7.69s/it] {'loss': 0.547, 'grad_norm': 0.8280596402034371, 'learning_rate': 4.289617486338798e-06, 'epoch': 0.01} + 1%|▏ | 158/12188 [22:03<25:42:00, 7.69s/it] 1%|▏ | 159/12188 [22:10<25:01:58, 7.49s/it] {'loss': 0.519, 'grad_norm': 0.7251166083954937, 'learning_rate': 4.316939890710383e-06, 'epoch': 0.01} + 1%|▏ | 159/12188 [22:10<25:01:58, 7.49s/it] 1%|▏ | 160/12188 [22:17<24:10:51, 7.24s/it] {'loss': 0.5226, 'grad_norm': 0.6906098353919035, 'learning_rate': 4.3442622950819676e-06, 'epoch': 0.01} + 1%|▏ | 160/12188 [22:17<24:10:51, 7.24s/it] 1%|▏ | 161/12188 [22:24<23:42:37, 7.10s/it] {'loss': 0.4778, 'grad_norm': 0.7029180259064449, 'learning_rate': 4.371584699453552e-06, 'epoch': 0.01} + 1%|▏ | 161/12188 [22:24<23:42:37, 7.10s/it] 1%|▏ | 162/12188 [22:30<23:17:51, 6.97s/it] {'loss': 0.4836, 'grad_norm': 0.7212367062954328, 'learning_rate': 4.398907103825137e-06, 'epoch': 0.01} + 1%|▏ | 162/12188 [22:30<23:17:51, 6.97s/it] 1%|▏ | 163/12188 [22:37<23:01:19, 6.89s/it] {'loss': 0.5141, 'grad_norm': 0.7036493053336564, 'learning_rate': 4.426229508196722e-06, 'epoch': 0.01} + 1%|▏ | 163/12188 [22:37<23:01:19, 6.89s/it] 1%|▏ | 164/12188 [22:45<23:39:49, 7.08s/it] {'loss': 0.4656, 'grad_norm': 0.6510839308020532, 'learning_rate': 4.453551912568307e-06, 'epoch': 0.01} + 1%|▏ | 164/12188 [22:45<23:39:49, 7.08s/it] 1%|▏ | 165/12188 [22:54<25:29:10, 7.63s/it] {'loss': 0.5375, 'grad_norm': 0.7026000593685714, 'learning_rate': 4.480874316939891e-06, 'epoch': 0.01} + 1%|▏ | 165/12188 [22:54<25:29:10, 7.63s/it] 1%|▏ | 166/12188 [23:01<24:54:24, 7.46s/it] {'loss': 0.5663, 'grad_norm': 0.7187224410212275, 'learning_rate': 4.508196721311476e-06, 'epoch': 0.01} + 1%|▏ | 166/12188 [23:01<24:54:24, 7.46s/it] 1%|▏ | 167/12188 [23:07<24:14:17, 7.26s/it] {'loss': 0.5495, 'grad_norm': 0.7740735840459895, 'learning_rate': 4.535519125683061e-06, 'epoch': 0.01} + 1%|▏ | 167/12188 [23:07<24:14:17, 7.26s/it] 1%|▏ | 168/12188 [23:14<23:43:15, 7.10s/it] {'loss': 0.4883, 'grad_norm': 0.7174969066563622, 'learning_rate': 4.562841530054645e-06, 'epoch': 0.01} + 1%|▏ | 168/12188 [23:14<23:43:15, 7.10s/it] 1%|▏ | 169/12188 [23:21<23:32:02, 7.05s/it] {'loss': 0.5549, 'grad_norm': 0.7358742733962577, 'learning_rate': 4.59016393442623e-06, 'epoch': 0.01} + 1%|▏ | 169/12188 [23:21<23:32:02, 7.05s/it] 1%|▏ | 170/12188 [23:28<23:15:19, 6.97s/it] {'loss': 0.5411, 'grad_norm': 0.7104310931064527, 'learning_rate': 4.617486338797815e-06, 'epoch': 0.01} + 1%|▏ | 170/12188 [23:28<23:15:19, 6.97s/it] 1%|▏ | 171/12188 [23:35<23:04:22, 6.91s/it] {'loss': 0.4588, 'grad_norm': 0.7431821605877618, 'learning_rate': 4.6448087431694e-06, 'epoch': 0.01} + 1%|▏ | 171/12188 [23:35<23:04:22, 6.91s/it] 1%|▏ | 172/12188 [23:44<25:19:51, 7.59s/it] {'loss': 0.5119, 'grad_norm': 0.7099686775669957, 'learning_rate': 4.672131147540984e-06, 'epoch': 0.01} + 1%|▏ | 172/12188 [23:44<25:19:51, 7.59s/it] 1%|▏ | 173/12188 [23:52<26:25:38, 7.92s/it] {'loss': 0.5425, 'grad_norm': 0.6951537547691425, 'learning_rate': 4.699453551912569e-06, 'epoch': 0.01} + 1%|▏ | 173/12188 [23:52<26:25:38, 7.92s/it] 1%|▏ | 174/12188 [24:00<25:45:22, 7.72s/it] {'loss': 0.5526, 'grad_norm': 0.7089535036082988, 'learning_rate': 4.726775956284154e-06, 'epoch': 0.01} + 1%|▏ | 174/12188 [24:00<25:45:22, 7.72s/it] 1%|▏ | 175/12188 [24:07<25:01:01, 7.50s/it] {'loss': 0.4894, 'grad_norm': 0.7379642172358976, 'learning_rate': 4.754098360655738e-06, 'epoch': 0.01} + 1%|▏ | 175/12188 [24:07<25:01:01, 7.50s/it] 1%|▏ | 176/12188 [24:13<23:59:15, 7.19s/it] {'loss': 0.5108, 'grad_norm': 0.7195500398829908, 'learning_rate': 4.781420765027322e-06, 'epoch': 0.01} + 1%|▏ | 176/12188 [24:13<23:59:15, 7.19s/it] 1%|▏ | 177/12188 [24:20<23:26:57, 7.03s/it] {'loss': 0.4737, 'grad_norm': 0.6953860103437562, 'learning_rate': 4.808743169398907e-06, 'epoch': 0.01} + 1%|▏ | 177/12188 [24:20<23:26:57, 7.03s/it] 1%|▏ | 178/12188 [24:27<23:11:15, 6.95s/it] {'loss': 0.499, 'grad_norm': 0.6912848981929625, 'learning_rate': 4.836065573770492e-06, 'epoch': 0.01} + 1%|▏ | 178/12188 [24:27<23:11:15, 6.95s/it] 1%|▏ | 179/12188 [24:37<26:14:05, 7.86s/it] {'loss': 0.5133, 'grad_norm': 0.7475510959967208, 'learning_rate': 4.863387978142076e-06, 'epoch': 0.01} + 1%|▏ | 179/12188 [24:37<26:14:05, 7.86s/it] 1%|▏ | 180/12188 [24:44<25:30:22, 7.65s/it] {'loss': 0.5091, 'grad_norm': 0.7753831797023494, 'learning_rate': 4.890710382513661e-06, 'epoch': 0.01} + 1%|▏ | 180/12188 [24:44<25:30:22, 7.65s/it] 1%|▏ | 181/12188 [24:51<25:13:19, 7.56s/it] {'loss': 0.501, 'grad_norm': 0.6966761826583769, 'learning_rate': 4.918032786885246e-06, 'epoch': 0.01} + 1%|▏ | 181/12188 [24:51<25:13:19, 7.56s/it] 1%|▏ | 182/12188 [24:58<24:17:07, 7.28s/it] {'loss': 0.4687, 'grad_norm': 0.7519230472398877, 'learning_rate': 4.945355191256831e-06, 'epoch': 0.01} + 1%|▏ | 182/12188 [24:58<24:17:07, 7.28s/it] 2%|▏ | 183/12188 [25:04<23:42:59, 7.11s/it] {'loss': 0.4911, 'grad_norm': 0.67827433000988, 'learning_rate': 4.9726775956284154e-06, 'epoch': 0.02} + 2%|▏ | 183/12188 [25:04<23:42:59, 7.11s/it] 2%|▏ | 184/12188 [25:12<24:03:07, 7.21s/it] {'loss': 0.5156, 'grad_norm': 0.6836673528085871, 'learning_rate': 5e-06, 'epoch': 0.02} + 2%|▏ | 184/12188 [25:12<24:03:07, 7.21s/it] 2%|▏ | 185/12188 [25:19<24:05:56, 7.23s/it] {'loss': 0.4999, 'grad_norm': 0.8493357596442787, 'learning_rate': 5.027322404371585e-06, 'epoch': 0.02} + 2%|▏ | 185/12188 [25:19<24:05:56, 7.23s/it] 2%|▏ | 186/12188 [25:26<23:47:24, 7.14s/it] {'loss': 0.4666, 'grad_norm': 0.7251608265186033, 'learning_rate': 5.0546448087431695e-06, 'epoch': 0.02} + 2%|▏ | 186/12188 [25:26<23:47:24, 7.14s/it] 2%|▏ | 187/12188 [25:34<25:02:42, 7.51s/it] {'loss': 0.5187, 'grad_norm': 0.7256599954067017, 'learning_rate': 5.0819672131147545e-06, 'epoch': 0.02} + 2%|▏ | 187/12188 [25:34<25:02:42, 7.51s/it] 2%|▏ | 188/12188 [25:41<24:32:06, 7.36s/it] {'loss': 0.4737, 'grad_norm': 0.7515864577718108, 'learning_rate': 5.1092896174863395e-06, 'epoch': 0.02} + 2%|▏ | 188/12188 [25:41<24:32:06, 7.36s/it] 2%|▏ | 189/12188 [25:49<24:15:43, 7.28s/it] {'loss': 0.5309, 'grad_norm': 0.7139370625006265, 'learning_rate': 5.1366120218579245e-06, 'epoch': 0.02} + 2%|▏ | 189/12188 [25:49<24:15:43, 7.28s/it] 2%|▏ | 190/12188 [25:55<23:49:39, 7.15s/it] {'loss': 0.5027, 'grad_norm': 0.7433117647073699, 'learning_rate': 5.163934426229509e-06, 'epoch': 0.02} + 2%|▏ | 190/12188 [25:55<23:49:39, 7.15s/it] 2%|▏ | 191/12188 [26:03<23:55:06, 7.18s/it] {'loss': 0.4661, 'grad_norm': 0.7249128535772957, 'learning_rate': 5.191256830601094e-06, 'epoch': 0.02} + 2%|▏ | 191/12188 [26:03<23:55:06, 7.18s/it] 2%|▏ | 192/12188 [26:11<24:54:28, 7.47s/it] {'loss': 0.5654, 'grad_norm': 0.7282325284072083, 'learning_rate': 5.218579234972679e-06, 'epoch': 0.02} + 2%|▏ | 192/12188 [26:11<24:54:28, 7.47s/it] 2%|▏ | 193/12188 [26:21<27:17:39, 8.19s/it] {'loss': 0.513, 'grad_norm': 0.7199008938094988, 'learning_rate': 5.245901639344263e-06, 'epoch': 0.02} + 2%|▏ | 193/12188 [26:21<27:17:39, 8.19s/it] 2%|▏ | 194/12188 [26:28<26:07:46, 7.84s/it] {'loss': 0.5096, 'grad_norm': 0.7039947581587781, 'learning_rate': 5.273224043715848e-06, 'epoch': 0.02} + 2%|▏ | 194/12188 [26:28<26:07:46, 7.84s/it] 2%|▏ | 195/12188 [26:34<25:00:52, 7.51s/it] {'loss': 0.5375, 'grad_norm': 0.7110940915833809, 'learning_rate': 5.300546448087433e-06, 'epoch': 0.02} + 2%|▏ | 195/12188 [26:34<25:00:52, 7.51s/it] 2%|▏ | 196/12188 [26:42<24:54:08, 7.48s/it] {'loss': 0.5143, 'grad_norm': 0.7543700852047978, 'learning_rate': 5.327868852459017e-06, 'epoch': 0.02} + 2%|▏ | 196/12188 [26:42<24:54:08, 7.48s/it] 2%|▏ | 197/12188 [26:49<24:18:26, 7.30s/it] {'loss': 0.5113, 'grad_norm': 0.7120086353387379, 'learning_rate': 5.355191256830602e-06, 'epoch': 0.02} + 2%|▏ | 197/12188 [26:49<24:18:26, 7.30s/it] 2%|▏ | 198/12188 [26:56<23:54:29, 7.18s/it] {'loss': 0.5447, 'grad_norm': 0.6862503719751668, 'learning_rate': 5.382513661202187e-06, 'epoch': 0.02} + 2%|▏ | 198/12188 [26:56<23:54:29, 7.18s/it] 2%|▏ | 199/12188 [27:03<23:40:20, 7.11s/it] {'loss': 0.5303, 'grad_norm': 0.724761928445193, 'learning_rate': 5.409836065573772e-06, 'epoch': 0.02} + 2%|▏ | 199/12188 [27:03<23:40:20, 7.11s/it] 2%|▏ | 200/12188 [27:12<25:43:36, 7.73s/it] {'loss': 0.5113, 'grad_norm': 0.6763503917362121, 'learning_rate': 5.437158469945356e-06, 'epoch': 0.02} + 2%|▏ | 200/12188 [27:12<25:43:36, 7.73s/it] 2%|▏ | 201/12188 [27:18<24:44:17, 7.43s/it] {'loss': 0.4485, 'grad_norm': 0.7103630077364631, 'learning_rate': 5.464480874316941e-06, 'epoch': 0.02} + 2%|▏ | 201/12188 [27:18<24:44:17, 7.43s/it] 2%|▏ | 202/12188 [27:28<26:32:52, 7.97s/it] {'loss': 0.4781, 'grad_norm': 1.470493893164305, 'learning_rate': 5.491803278688526e-06, 'epoch': 0.02} + 2%|▏ | 202/12188 [27:28<26:32:52, 7.97s/it] 2%|▏ | 203/12188 [27:35<25:24:45, 7.63s/it] {'loss': 0.5147, 'grad_norm': 0.7243588393636814, 'learning_rate': 5.519125683060109e-06, 'epoch': 0.02} + 2%|▏ | 203/12188 [27:35<25:24:45, 7.63s/it] 2%|▏ | 204/12188 [27:42<24:46:19, 7.44s/it] {'loss': 0.4709, 'grad_norm': 0.6807181004819323, 'learning_rate': 5.546448087431694e-06, 'epoch': 0.02} + 2%|▏ | 204/12188 [27:42<24:46:19, 7.44s/it] 2%|▏ | 205/12188 [27:49<24:57:18, 7.50s/it] {'loss': 0.5141, 'grad_norm': 0.708060012048527, 'learning_rate': 5.573770491803278e-06, 'epoch': 0.02} + 2%|▏ | 205/12188 [27:49<24:57:18, 7.50s/it] 2%|▏ | 206/12188 [27:56<24:35:32, 7.39s/it] {'loss': 0.5155, 'grad_norm': 0.68838680333791, 'learning_rate': 5.601092896174863e-06, 'epoch': 0.02} + 2%|▏ | 206/12188 [27:56<24:35:32, 7.39s/it] 2%|▏ | 207/12188 [28:04<24:31:11, 7.37s/it] {'loss': 0.4788, 'grad_norm': 0.7239270961077369, 'learning_rate': 5.628415300546448e-06, 'epoch': 0.02} + 2%|▏ | 207/12188 [28:04<24:31:11, 7.37s/it] 2%|▏ | 208/12188 [28:10<23:51:31, 7.17s/it] {'loss': 0.4839, 'grad_norm': 0.7328558873875821, 'learning_rate': 5.655737704918033e-06, 'epoch': 0.02} + 2%|▏ | 208/12188 [28:10<23:51:31, 7.17s/it] 2%|▏ | 209/12188 [28:17<23:16:12, 6.99s/it] {'loss': 0.4979, 'grad_norm': 0.7760332556150643, 'learning_rate': 5.683060109289617e-06, 'epoch': 0.02} + 2%|▏ | 209/12188 [28:17<23:16:12, 6.99s/it] 2%|▏ | 210/12188 [28:28<27:05:51, 8.14s/it] {'loss': 0.4849, 'grad_norm': 0.7102887727675635, 'learning_rate': 5.710382513661202e-06, 'epoch': 0.02} + 2%|▏ | 210/12188 [28:28<27:05:51, 8.14s/it] 2%|▏ | 211/12188 [28:36<26:52:33, 8.08s/it] {'loss': 0.4659, 'grad_norm': 0.7363864553582286, 'learning_rate': 5.737704918032787e-06, 'epoch': 0.02} + 2%|▏ | 211/12188 [28:36<26:52:33, 8.08s/it] 2%|▏ | 212/12188 [28:43<25:52:10, 7.78s/it] {'loss': 0.4596, 'grad_norm': 0.7035864495655544, 'learning_rate': 5.7650273224043715e-06, 'epoch': 0.02} + 2%|▏ | 212/12188 [28:43<25:52:10, 7.78s/it] 2%|▏ | 213/12188 [28:49<24:48:40, 7.46s/it] {'loss': 0.487, 'grad_norm': 0.7457081676983588, 'learning_rate': 5.7923497267759565e-06, 'epoch': 0.02} + 2%|▏ | 213/12188 [28:49<24:48:40, 7.46s/it] 2%|▏ | 214/12188 [28:59<26:39:19, 8.01s/it] {'loss': 0.4861, 'grad_norm': 0.7239763029713772, 'learning_rate': 5.8196721311475415e-06, 'epoch': 0.02} + 2%|▏ | 214/12188 [28:59<26:39:19, 8.01s/it] 2%|▏ | 215/12188 [29:06<25:59:06, 7.81s/it] {'loss': 0.5131, 'grad_norm': 0.8293037191887754, 'learning_rate': 5.846994535519126e-06, 'epoch': 0.02} + 2%|▏ | 215/12188 [29:06<25:59:06, 7.81s/it] 2%|▏ | 216/12188 [29:13<25:16:34, 7.60s/it] {'loss': 0.489, 'grad_norm': 0.713670742599938, 'learning_rate': 5.874316939890711e-06, 'epoch': 0.02} + 2%|▏ | 216/12188 [29:13<25:16:34, 7.60s/it] 2%|▏ | 217/12188 [29:20<24:29:02, 7.36s/it] {'loss': 0.5162, 'grad_norm': 0.6784402301270791, 'learning_rate': 5.9016393442622956e-06, 'epoch': 0.02} + 2%|▏ | 217/12188 [29:20<24:29:02, 7.36s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fe665350590> +[Try #0] Failed to fetch sample 4821113 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fe665350590> +Problematic sample: {'image': '20240822_131046_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Picture Format'"}, {'from': 'gpt', 'value': '\nclick(x=0.6525, y=0.0685)\n'}]} + 2%|▏ | 218/12188 [29:27<23:46:45, 7.15s/it] {'loss': 0.5109, 'grad_norm': 0.7618172919676223, 'learning_rate': 5.9289617486338805e-06, 'epoch': 0.02} + 2%|▏ | 218/12188 [29:27<23:46:45, 7.15s/it] 2%|▏ | 219/12188 [29:34<23:42:30, 7.13s/it] {'loss': 0.5196, 'grad_norm': 0.7084600876246179, 'learning_rate': 5.956284153005465e-06, 'epoch': 0.02} + 2%|▏ | 219/12188 [29:34<23:42:30, 7.13s/it] 2%|▏ | 220/12188 [29:40<22:58:24, 6.91s/it] {'loss': 0.5351, 'grad_norm': 0.6975307025246504, 'learning_rate': 5.98360655737705e-06, 'epoch': 0.02} + 2%|▏ | 220/12188 [29:40<22:58:24, 6.91s/it] 2%|▏ | 221/12188 [29:47<23:06:08, 6.95s/it] {'loss': 0.48, 'grad_norm': 0.7965883513551822, 'learning_rate': 6.010928961748635e-06, 'epoch': 0.02} + 2%|▏ | 221/12188 [29:47<23:06:08, 6.95s/it] 2%|▏ | 222/12188 [29:54<23:07:57, 6.96s/it] {'loss': 0.479, 'grad_norm': 0.7124731647151697, 'learning_rate': 6.038251366120219e-06, 'epoch': 0.02} + 2%|▏ | 222/12188 [29:54<23:07:57, 6.96s/it] 2%|▏ | 223/12188 [30:01<23:11:06, 6.98s/it] {'loss': 0.4725, 'grad_norm': 0.6887948060629508, 'learning_rate': 6.065573770491804e-06, 'epoch': 0.02} + 2%|▏ | 223/12188 [30:01<23:11:06, 6.98s/it] 2%|▏ | 224/12188 [30:08<22:32:42, 6.78s/it] {'loss': 0.5118, 'grad_norm': 0.7955094446195284, 'learning_rate': 6.092896174863389e-06, 'epoch': 0.02} + 2%|▏ | 224/12188 [30:08<22:32:42, 6.78s/it] 2%|▏ | 225/12188 [30:16<23:45:16, 7.15s/it] {'loss': 0.5219, 'grad_norm': 0.7078808702226231, 'learning_rate': 6.120218579234973e-06, 'epoch': 0.02} + 2%|▏ | 225/12188 [30:16<23:45:16, 7.15s/it] 2%|▏ | 226/12188 [30:22<23:05:22, 6.95s/it] {'loss': 0.4865, 'grad_norm': 0.752437471063338, 'learning_rate': 6.147540983606558e-06, 'epoch': 0.02} + 2%|▏ | 226/12188 [30:22<23:05:22, 6.95s/it] 2%|▏ | 227/12188 [30:30<23:41:19, 7.13s/it] {'loss': 0.4852, 'grad_norm': 0.7547184388582712, 'learning_rate': 6.174863387978143e-06, 'epoch': 0.02} + 2%|▏ | 227/12188 [30:30<23:41:19, 7.13s/it] 2%|▏ | 228/12188 [30:37<23:48:55, 7.17s/it] {'loss': 0.5174, 'grad_norm': 0.7822474394399602, 'learning_rate': 6.202185792349728e-06, 'epoch': 0.02} + 2%|▏ | 228/12188 [30:37<23:48:55, 7.17s/it] 2%|▏ | 229/12188 [30:45<25:10:59, 7.58s/it] {'loss': 0.4983, 'grad_norm': 0.693832699033056, 'learning_rate': 6.229508196721312e-06, 'epoch': 0.02} + 2%|▏ | 229/12188 [30:45<25:10:59, 7.58s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 90, in pil_loader + return img.convert("RGB") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 993, in convert + self.load() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 319, in load + raise _get_oserror(err_code, encoder=False) +OSError: unrecognized data stream contents when reading image file +[Try #0] Failed to fetch sample 6013461 in VC:s3://gui-agent/data_20250623/windows_augment/images. Exception: unrecognized data stream contents when reading image file +Problematic sample: {'image': 'autocad/20250508_161646_1/images/before_screenshot_1_id_73_internvl_element-caption_crop_0_grounding_instructions_point_o_paste.png', 'conversations': [{'from': 'human', 'value': "\nRetrieve the coordinates from the reference: The 'Reference' dropdown button in AutoCAD Mechanical's ribbon interface, positioned within the Block section of the toolbar."}, {'from': 'gpt', 'value': "The 'Reference' dropdown button in AutoCAD Mechanical's ribbon interface, positioned within the Block section of the toolbar.[[449, 168]]"}], 'width': 3600, 'height': 2338} + 2%|▏ | 230/12188 [30:52<24:06:04, 7.26s/it] {'loss': 0.4554, 'grad_norm': 0.7244900380744682, 'learning_rate': 6.256830601092897e-06, 'epoch': 0.02} + 2%|▏ | 230/12188 [30:52<24:06:04, 7.26s/it] 2%|▏ | 231/12188 [30:59<24:21:17, 7.33s/it] {'loss': 0.4778, 'grad_norm': 0.6734569921178174, 'learning_rate': 6.284153005464482e-06, 'epoch': 0.02} + 2%|▏ | 231/12188 [30:59<24:21:17, 7.33s/it] 2%|▏ | 232/12188 [31:07<24:18:30, 7.32s/it] {'loss': 0.5051, 'grad_norm': 0.6871351773704322, 'learning_rate': 6.311475409836066e-06, 'epoch': 0.02} + 2%|▏ | 232/12188 [31:07<24:18:30, 7.32s/it] 2%|▏ | 233/12188 [31:14<24:21:35, 7.34s/it] {'loss': 0.5398, 'grad_norm': 0.8091815082981457, 'learning_rate': 6.338797814207651e-06, 'epoch': 0.02} + 2%|▏ | 233/12188 [31:14<24:21:35, 7.34s/it] 2%|▏ | 234/12188 [31:21<24:26:05, 7.36s/it] {'loss': 0.539, 'grad_norm': 0.7724622540458691, 'learning_rate': 6.366120218579236e-06, 'epoch': 0.02} + 2%|▏ | 234/12188 [31:21<24:26:05, 7.36s/it] 2%|▏ | 235/12188 [31:28<23:54:41, 7.20s/it] {'loss': 0.5089, 'grad_norm': 0.7991344883938822, 'learning_rate': 6.393442622950821e-06, 'epoch': 0.02} + 2%|▏ | 235/12188 [31:28<23:54:41, 7.20s/it] 2%|▏ | 236/12188 [31:36<24:02:34, 7.24s/it] {'loss': 0.4716, 'grad_norm': 0.7783987761834243, 'learning_rate': 6.420765027322405e-06, 'epoch': 0.02} + 2%|▏ | 236/12188 [31:36<24:02:34, 7.24s/it] 2%|▏ | 237/12188 [31:45<25:50:02, 7.78s/it] {'loss': 0.4857, 'grad_norm': 0.7879664281886021, 'learning_rate': 6.44808743169399e-06, 'epoch': 0.02} + 2%|▏ | 237/12188 [31:45<25:50:02, 7.78s/it] 2%|▏ | 238/12188 [31:52<25:24:29, 7.65s/it] {'loss': 0.4999, 'grad_norm': 0.7545213205093599, 'learning_rate': 6.475409836065575e-06, 'epoch': 0.02} + 2%|▏ | 238/12188 [31:52<25:24:29, 7.65s/it] 2%|▏ | 239/12188 [31:59<24:25:38, 7.36s/it] {'loss': 0.4971, 'grad_norm': 0.7485190909060746, 'learning_rate': 6.5027322404371584e-06, 'epoch': 0.02} + 2%|▏ | 239/12188 [31:59<24:25:38, 7.36s/it] 2%|▏ | 240/12188 [32:06<24:25:54, 7.36s/it] {'loss': 0.4441, 'grad_norm': 0.7114781795927844, 'learning_rate': 6.530054644808743e-06, 'epoch': 0.02} + 2%|▏ | 240/12188 [32:06<24:25:54, 7.36s/it] 2%|▏ | 241/12188 [32:14<25:13:11, 7.60s/it] {'loss': 0.5011, 'grad_norm': 0.793894220931936, 'learning_rate': 6.5573770491803276e-06, 'epoch': 0.02} + 2%|▏ | 241/12188 [32:14<25:13:11, 7.60s/it] 2%|▏ | 242/12188 [32:21<24:35:44, 7.41s/it] {'loss': 0.4542, 'grad_norm': 0.7507841289943263, 'learning_rate': 6.5846994535519125e-06, 'epoch': 0.02} + 2%|▏ | 242/12188 [32:21<24:35:44, 7.41s/it] 2%|▏ | 243/12188 [32:31<26:37:09, 8.02s/it] {'loss': 0.517, 'grad_norm': 0.7613787235425983, 'learning_rate': 6.6120218579234975e-06, 'epoch': 0.02} + 2%|▏ | 243/12188 [32:31<26:37:09, 8.02s/it] 2%|▏ | 244/12188 [32:37<25:13:55, 7.61s/it] {'loss': 0.5164, 'grad_norm': 0.7842513155616389, 'learning_rate': 6.6393442622950825e-06, 'epoch': 0.02} + 2%|▏ | 244/12188 [32:37<25:13:55, 7.61s/it] 2%|▏ | 245/12188 [32:44<24:27:50, 7.37s/it] {'loss': 0.5188, 'grad_norm': 0.7612335417927749, 'learning_rate': 6.666666666666667e-06, 'epoch': 0.02} + 2%|▏ | 245/12188 [32:44<24:27:50, 7.37s/it] 2%|▏ | 246/12188 [32:51<24:10:02, 7.29s/it] {'loss': 0.5357, 'grad_norm': 0.710277948436649, 'learning_rate': 6.693989071038252e-06, 'epoch': 0.02} + 2%|▏ | 246/12188 [32:51<24:10:02, 7.29s/it] 2%|▏ | 247/12188 [32:58<23:56:22, 7.22s/it] {'loss': 0.4947, 'grad_norm': 0.7593982472109353, 'learning_rate': 6.721311475409837e-06, 'epoch': 0.02} + 2%|▏ | 247/12188 [32:58<23:56:22, 7.22s/it] 2%|▏ | 248/12188 [33:05<23:37:31, 7.12s/it] {'loss': 0.4546, 'grad_norm': 0.6516468513397349, 'learning_rate': 6.748633879781421e-06, 'epoch': 0.02} + 2%|▏ | 248/12188 [33:05<23:37:31, 7.12s/it] 2%|▏ | 249/12188 [33:12<23:00:13, 6.94s/it] {'loss': 0.4788, 'grad_norm': 0.6961226163309198, 'learning_rate': 6.775956284153006e-06, 'epoch': 0.02} + 2%|▏ | 249/12188 [33:12<23:00:13, 6.94s/it] 2%|▏ | 250/12188 [33:18<22:41:30, 6.84s/it] {'loss': 0.5056, 'grad_norm': 0.7856434345730949, 'learning_rate': 6.803278688524591e-06, 'epoch': 0.02} + 2%|▏ | 250/12188 [33:18<22:41:30, 6.84s/it] 2%|▏ | 251/12188 [33:26<23:16:11, 7.02s/it] {'loss': 0.4644, 'grad_norm': 0.6709913584324687, 'learning_rate': 6.830601092896175e-06, 'epoch': 0.02} + 2%|▏ | 251/12188 [33:26<23:16:11, 7.02s/it] 2%|▏ | 252/12188 [33:33<23:35:42, 7.12s/it] {'loss': 0.5257, 'grad_norm': 0.725809958662215, 'learning_rate': 6.85792349726776e-06, 'epoch': 0.02} + 2%|▏ | 252/12188 [33:33<23:35:42, 7.12s/it] 2%|▏ | 253/12188 [33:40<23:02:03, 6.95s/it] {'loss': 0.4702, 'grad_norm': 0.7522902710326914, 'learning_rate': 6.885245901639345e-06, 'epoch': 0.02} + 2%|▏ | 253/12188 [33:40<23:02:03, 6.95s/it] 2%|▏ | 254/12188 [33:46<22:56:00, 6.92s/it] {'loss': 0.4365, 'grad_norm': 0.7950237553247519, 'learning_rate': 6.91256830601093e-06, 'epoch': 0.02} + 2%|▏ | 254/12188 [33:46<22:56:00, 6.92s/it] 2%|▏ | 255/12188 [33:53<22:49:58, 6.89s/it] {'loss': 0.4841, 'grad_norm': 0.6950687309001368, 'learning_rate': 6.939890710382514e-06, 'epoch': 0.02} + 2%|▏ | 255/12188 [33:53<22:49:58, 6.89s/it] 2%|▏ | 256/12188 [34:00<22:29:01, 6.78s/it] {'loss': 0.5244, 'grad_norm': 0.7424707337968431, 'learning_rate': 6.967213114754099e-06, 'epoch': 0.02} + 2%|▏ | 256/12188 [34:00<22:29:01, 6.78s/it] 2%|▏ | 257/12188 [34:07<22:31:41, 6.80s/it] {'loss': 0.4806, 'grad_norm': 0.7419306692293721, 'learning_rate': 6.994535519125684e-06, 'epoch': 0.02} + 2%|▏ | 257/12188 [34:07<22:31:41, 6.80s/it] 2%|▏ | 258/12188 [34:14<22:36:53, 6.82s/it] {'loss': 0.4922, 'grad_norm': 0.7515718720294478, 'learning_rate': 7.021857923497268e-06, 'epoch': 0.02} + 2%|▏ | 258/12188 [34:14<22:36:53, 6.82s/it] 2%|▏ | 259/12188 [34:20<22:26:16, 6.77s/it] {'loss': 0.4204, 'grad_norm': 0.7135712134020837, 'learning_rate': 7.049180327868853e-06, 'epoch': 0.02} + 2%|▏ | 259/12188 [34:20<22:26:16, 6.77s/it] 2%|▏ | 260/12188 [34:28<23:30:50, 7.10s/it] {'loss': 0.5011, 'grad_norm': 0.7039455465319501, 'learning_rate': 7.076502732240438e-06, 'epoch': 0.02} + 2%|▏ | 260/12188 [34:28<23:30:50, 7.10s/it] 2%|▏ | 261/12188 [34:35<23:18:18, 7.03s/it] {'loss': 0.4777, 'grad_norm': 0.7521350075014408, 'learning_rate': 7.103825136612022e-06, 'epoch': 0.02} + 2%|▏ | 261/12188 [34:35<23:18:18, 7.03s/it] 2%|▏ | 262/12188 [34:42<23:24:15, 7.06s/it] {'loss': 0.5077, 'grad_norm': 0.7399889000505141, 'learning_rate': 7.131147540983607e-06, 'epoch': 0.02} + 2%|▏ | 262/12188 [34:42<23:24:15, 7.06s/it] 2%|▏ | 263/12188 [34:50<24:18:03, 7.34s/it] {'loss': 0.464, 'grad_norm': 0.6850858476258389, 'learning_rate': 7.158469945355192e-06, 'epoch': 0.02} + 2%|▏ | 263/12188 [34:50<24:18:03, 7.34s/it] 2%|▏ | 264/12188 [34:57<24:01:43, 7.25s/it] {'loss': 0.4712, 'grad_norm': 0.708899608581843, 'learning_rate': 7.185792349726777e-06, 'epoch': 0.02} + 2%|▏ | 264/12188 [34:57<24:01:43, 7.25s/it] 2%|▏ | 265/12188 [35:05<25:03:35, 7.57s/it] {'loss': 0.4798, 'grad_norm': 0.7209070719253067, 'learning_rate': 7.213114754098361e-06, 'epoch': 0.02} + 2%|▏ | 265/12188 [35:05<25:03:35, 7.57s/it] 2%|▏ | 266/12188 [35:12<24:10:22, 7.30s/it] {'loss': 0.5322, 'grad_norm': 0.7306374132694864, 'learning_rate': 7.240437158469946e-06, 'epoch': 0.02} + 2%|▏ | 266/12188 [35:12<24:10:22, 7.30s/it] 2%|▏ | 267/12188 [35:20<24:31:19, 7.41s/it] {'loss': 0.5184, 'grad_norm': 0.7772129021189023, 'learning_rate': 7.267759562841531e-06, 'epoch': 0.02} + 2%|▏ | 267/12188 [35:20<24:31:19, 7.41s/it] 2%|▏ | 268/12188 [35:26<23:46:59, 7.18s/it] {'loss': 0.4622, 'grad_norm': 0.6765022684123391, 'learning_rate': 7.295081967213115e-06, 'epoch': 0.02} + 2%|▏ | 268/12188 [35:26<23:46:59, 7.18s/it] 2%|▏ | 269/12188 [35:33<23:25:35, 7.08s/it] {'loss': 0.4761, 'grad_norm': 0.7073564967165935, 'learning_rate': 7.3224043715847e-06, 'epoch': 0.02} + 2%|▏ | 269/12188 [35:33<23:25:35, 7.08s/it] 2%|▏ | 270/12188 [35:41<24:34:11, 7.42s/it] {'loss': 0.4386, 'grad_norm': 1.0185678136029017, 'learning_rate': 7.349726775956285e-06, 'epoch': 0.02} + 2%|▏ | 270/12188 [35:41<24:34:11, 7.42s/it] 2%|▏ | 271/12188 [35:48<23:54:33, 7.22s/it] {'loss': 0.4746, 'grad_norm': 0.6974035976598978, 'learning_rate': 7.3770491803278695e-06, 'epoch': 0.02} + 2%|▏ | 271/12188 [35:48<23:54:33, 7.22s/it] 2%|▏ | 272/12188 [35:55<23:41:21, 7.16s/it] {'loss': 0.5047, 'grad_norm': 0.7338952253335077, 'learning_rate': 7.4043715846994544e-06, 'epoch': 0.02} + 2%|▏ | 272/12188 [35:55<23:41:21, 7.16s/it] 2%|▏ | 273/12188 [36:02<23:15:25, 7.03s/it] {'loss': 0.5275, 'grad_norm': 0.7456966980847313, 'learning_rate': 7.4316939890710394e-06, 'epoch': 0.02} + 2%|▏ | 273/12188 [36:02<23:15:25, 7.03s/it] 2%|▏ | 274/12188 [36:09<23:23:29, 7.07s/it] {'loss': 0.464, 'grad_norm': 0.8026042643549763, 'learning_rate': 7.459016393442624e-06, 'epoch': 0.02} + 2%|▏ | 274/12188 [36:09<23:23:29, 7.07s/it] 2%|▏ | 275/12188 [36:17<23:44:49, 7.18s/it] {'loss': 0.4722, 'grad_norm': 0.7295729206906412, 'learning_rate': 7.4863387978142085e-06, 'epoch': 0.02} + 2%|▏ | 275/12188 [36:17<23:44:49, 7.18s/it] 2%|▏ | 276/12188 [36:23<23:22:28, 7.06s/it] {'loss': 0.4721, 'grad_norm': 0.7940364942726144, 'learning_rate': 7.513661202185793e-06, 'epoch': 0.02} + 2%|▏ | 276/12188 [36:23<23:22:28, 7.06s/it] 2%|▏ | 277/12188 [36:31<23:49:17, 7.20s/it] {'loss': 0.484, 'grad_norm': 0.7486001185681557, 'learning_rate': 7.540983606557377e-06, 'epoch': 0.02} + 2%|▏ | 277/12188 [36:31<23:49:17, 7.20s/it] 2%|▏ | 278/12188 [36:38<23:52:32, 7.22s/it] {'loss': 0.5272, 'grad_norm': 0.7284050307306237, 'learning_rate': 7.568306010928962e-06, 'epoch': 0.02} + 2%|▏ | 278/12188 [36:38<23:52:32, 7.22s/it] 2%|▏ | 279/12188 [36:45<23:11:39, 7.01s/it] {'loss': 0.4841, 'grad_norm': 0.7807521672555144, 'learning_rate': 7.595628415300547e-06, 'epoch': 0.02} + 2%|▏ | 279/12188 [36:45<23:11:39, 7.01s/it] 2%|▏ | 280/12188 [36:54<25:25:11, 7.68s/it] {'loss': 0.5082, 'grad_norm': 0.7423125440153614, 'learning_rate': 7.622950819672132e-06, 'epoch': 0.02} + 2%|▏ | 280/12188 [36:54<25:25:11, 7.68s/it] 2%|▏ | 281/12188 [37:01<25:05:07, 7.58s/it] {'loss': 0.4698, 'grad_norm': 0.7202916618864765, 'learning_rate': 7.650273224043716e-06, 'epoch': 0.02} + 2%|▏ | 281/12188 [37:01<25:05:07, 7.58s/it] 2%|▏ | 282/12188 [37:10<26:17:57, 7.95s/it] {'loss': 0.5325, 'grad_norm': 0.7604901644633639, 'learning_rate': 7.677595628415301e-06, 'epoch': 0.02} + 2%|▏ | 282/12188 [37:10<26:17:57, 7.95s/it] 2%|▏ | 283/12188 [37:18<25:51:08, 7.82s/it] {'loss': 0.4706, 'grad_norm': 0.7388611534619522, 'learning_rate': 7.704918032786886e-06, 'epoch': 0.02} + 2%|▏ | 283/12188 [37:18<25:51:08, 7.82s/it] 2%|▏ | 284/12188 [37:25<25:13:38, 7.63s/it] {'loss': 0.4919, 'grad_norm': 0.7478738497567147, 'learning_rate': 7.732240437158471e-06, 'epoch': 0.02} + 2%|▏ | 284/12188 [37:25<25:13:38, 7.63s/it] 2%|▏ | 285/12188 [37:31<24:19:04, 7.35s/it] {'loss': 0.5021, 'grad_norm': 0.793051303030315, 'learning_rate': 7.759562841530056e-06, 'epoch': 0.02} + 2%|▏ | 285/12188 [37:31<24:19:04, 7.35s/it] 2%|▏ | 286/12188 [37:38<23:46:21, 7.19s/it] {'loss': 0.4747, 'grad_norm': 0.7300441301912832, 'learning_rate': 7.786885245901639e-06, 'epoch': 0.02} + 2%|▏ | 286/12188 [37:38<23:46:21, 7.19s/it] 2%|▏ | 287/12188 [37:46<23:50:33, 7.21s/it] {'loss': 0.4788, 'grad_norm': 0.7403184073542309, 'learning_rate': 7.814207650273224e-06, 'epoch': 0.02} + 2%|▏ | 287/12188 [37:46<23:50:33, 7.21s/it] 2%|▏ | 288/12188 [37:54<25:21:42, 7.67s/it] {'loss': 0.4793, 'grad_norm': 0.7448376481423855, 'learning_rate': 7.841530054644809e-06, 'epoch': 0.02} + 2%|▏ | 288/12188 [37:54<25:21:42, 7.67s/it] 2%|▏ | 289/12188 [38:02<25:54:24, 7.84s/it] {'loss': 0.5112, 'grad_norm': 0.8004814810395461, 'learning_rate': 7.868852459016394e-06, 'epoch': 0.02} + 2%|▏ | 289/12188 [38:02<25:54:24, 7.84s/it] 2%|▏ | 290/12188 [38:10<25:50:02, 7.82s/it] {'loss': 0.5153, 'grad_norm': 0.7587413695095064, 'learning_rate': 7.896174863387979e-06, 'epoch': 0.02} + 2%|▏ | 290/12188 [38:10<25:50:02, 7.82s/it] 2%|▏ | 291/12188 [38:18<25:37:43, 7.76s/it] {'loss': 0.4574, 'grad_norm': 0.7057932396663791, 'learning_rate': 7.923497267759564e-06, 'epoch': 0.02} + 2%|▏ | 291/12188 [38:18<25:37:43, 7.76s/it] 2%|▏ | 292/12188 [38:25<24:36:06, 7.45s/it] {'loss': 0.4482, 'grad_norm': 0.7346626512251365, 'learning_rate': 7.950819672131147e-06, 'epoch': 0.02} + 2%|▏ | 292/12188 [38:25<24:36:06, 7.45s/it] 2%|▏ | 293/12188 [38:33<25:35:36, 7.75s/it] {'loss': 0.4648, 'grad_norm': 0.7256235360123868, 'learning_rate': 7.978142076502732e-06, 'epoch': 0.02} + 2%|▏ | 293/12188 [38:33<25:35:36, 7.75s/it] 2%|▏ | 294/12188 [38:40<24:33:54, 7.44s/it] {'loss': 0.4928, 'grad_norm': 0.7426640793653947, 'learning_rate': 8.005464480874317e-06, 'epoch': 0.02} + 2%|▏ | 294/12188 [38:40<24:33:54, 7.44s/it] 2%|▏ | 295/12188 [38:47<24:08:39, 7.31s/it] {'loss': 0.4773, 'grad_norm': 0.6830842630027686, 'learning_rate': 8.032786885245902e-06, 'epoch': 0.02} + 2%|▏ | 295/12188 [38:47<24:08:39, 7.31s/it] 2%|▏ | 296/12188 [38:53<23:22:46, 7.08s/it] {'loss': 0.5165, 'grad_norm': 0.8328130237681087, 'learning_rate': 8.060109289617487e-06, 'epoch': 0.02} + 2%|▏ | 296/12188 [38:53<23:22:46, 7.08s/it] 2%|▏ | 297/12188 [39:00<23:15:27, 7.04s/it] {'loss': 0.4974, 'grad_norm': 0.6931884993503776, 'learning_rate': 8.087431693989072e-06, 'epoch': 0.02} + 2%|▏ | 297/12188 [39:00<23:15:27, 7.04s/it] 2%|▏ | 298/12188 [39:10<25:49:44, 7.82s/it] {'loss': 0.4797, 'grad_norm': 0.72777194335979, 'learning_rate': 8.114754098360657e-06, 'epoch': 0.02} + 2%|▏ | 298/12188 [39:10<25:49:44, 7.82s/it] 2%|▏ | 299/12188 [39:17<25:16:41, 7.65s/it] {'loss': 0.4845, 'grad_norm': 0.7525301581418226, 'learning_rate': 8.14207650273224e-06, 'epoch': 0.02} + 2%|▏ | 299/12188 [39:17<25:16:41, 7.65s/it] 2%|▏ | 300/12188 [39:27<27:44:23, 8.40s/it] {'loss': 0.4896, 'grad_norm': 0.7243695451603184, 'learning_rate': 8.169398907103826e-06, 'epoch': 0.02} + 2%|▏ | 300/12188 [39:27<27:44:23, 8.40s/it] 2%|▏ | 301/12188 [39:34<26:07:55, 7.91s/it] {'loss': 0.4709, 'grad_norm': 0.7341444038210161, 'learning_rate': 8.19672131147541e-06, 'epoch': 0.02} + 2%|▏ | 301/12188 [39:34<26:07:55, 7.91s/it] 2%|▏ | 302/12188 [39:41<25:11:44, 7.63s/it] {'loss': 0.4876, 'grad_norm': 0.7498865761223373, 'learning_rate': 8.224043715846995e-06, 'epoch': 0.02} + 2%|▏ | 302/12188 [39:41<25:11:44, 7.63s/it] 2%|▏ | 303/12188 [39:48<24:24:27, 7.39s/it] {'loss': 0.4955, 'grad_norm': 0.6769756622245103, 'learning_rate': 8.25136612021858e-06, 'epoch': 0.02} + 2%|▏ | 303/12188 [39:48<24:24:27, 7.39s/it] 2%|▏ | 304/12188 [39:56<24:43:51, 7.49s/it] {'loss': 0.5092, 'grad_norm': 0.7092618677680266, 'learning_rate': 8.278688524590165e-06, 'epoch': 0.02} + 2%|▏ | 304/12188 [39:56<24:43:51, 7.49s/it] 3%|▎ | 305/12188 [40:03<24:18:53, 7.37s/it] {'loss': 0.5194, 'grad_norm': 0.7740115083798342, 'learning_rate': 8.30601092896175e-06, 'epoch': 0.03} + 3%|▎ | 305/12188 [40:03<24:18:53, 7.37s/it] 3%|▎ | 306/12188 [40:10<24:17:01, 7.36s/it] {'loss': 0.4636, 'grad_norm': 0.7361606758491017, 'learning_rate': 8.333333333333334e-06, 'epoch': 0.03} + 3%|▎ | 306/12188 [40:10<24:17:01, 7.36s/it] 3%|▎ | 307/12188 [40:19<26:02:32, 7.89s/it] {'loss': 0.4678, 'grad_norm': 0.7235739656871362, 'learning_rate': 8.360655737704919e-06, 'epoch': 0.03} + 3%|▎ | 307/12188 [40:19<26:02:32, 7.89s/it] 3%|▎ | 308/12188 [40:26<24:48:35, 7.52s/it] {'loss': 0.462, 'grad_norm': 0.7200813323160887, 'learning_rate': 8.387978142076504e-06, 'epoch': 0.03} + 3%|▎ | 308/12188 [40:26<24:48:35, 7.52s/it] 3%|▎ | 309/12188 [40:35<26:09:56, 7.93s/it] {'loss': 0.4673, 'grad_norm': 0.7425112449267953, 'learning_rate': 8.415300546448089e-06, 'epoch': 0.03} + 3%|▎ | 309/12188 [40:35<26:09:56, 7.93s/it] 3%|▎ | 310/12188 [40:42<25:06:39, 7.61s/it] {'loss': 0.5408, 'grad_norm': 0.9021836323143811, 'learning_rate': 8.442622950819674e-06, 'epoch': 0.03} + 3%|▎ | 310/12188 [40:42<25:06:39, 7.61s/it] 3%|▎ | 311/12188 [40:49<24:42:52, 7.49s/it] {'loss': 0.492, 'grad_norm': 0.7970247048932755, 'learning_rate': 8.469945355191259e-06, 'epoch': 0.03} + 3%|▎ | 311/12188 [40:49<24:42:52, 7.49s/it] 3%|▎ | 312/12188 [40:56<24:08:21, 7.32s/it] {'loss': 0.4547, 'grad_norm': 0.811538302246817, 'learning_rate': 8.497267759562842e-06, 'epoch': 0.03} + 3%|▎ | 312/12188 [40:56<24:08:21, 7.32s/it] 3%|▎ | 313/12188 [41:04<25:13:35, 7.65s/it] {'loss': 0.4687, 'grad_norm': 0.7267247239879944, 'learning_rate': 8.524590163934427e-06, 'epoch': 0.03} + 3%|▎ | 313/12188 [41:04<25:13:35, 7.65s/it] 3%|▎ | 314/12188 [41:11<24:45:04, 7.50s/it] {'loss': 0.4897, 'grad_norm': 0.7752832459631157, 'learning_rate': 8.551912568306012e-06, 'epoch': 0.03} + 3%|▎ | 314/12188 [41:11<24:45:04, 7.50s/it] 3%|▎ | 315/12188 [41:18<24:07:43, 7.32s/it] {'loss': 0.4879, 'grad_norm': 0.7620499486189858, 'learning_rate': 8.579234972677595e-06, 'epoch': 0.03} + 3%|▎ | 315/12188 [41:18<24:07:43, 7.32s/it] 3%|▎ | 316/12188 [41:25<23:50:23, 7.23s/it] {'loss': 0.4493, 'grad_norm': 0.7622870026247587, 'learning_rate': 8.60655737704918e-06, 'epoch': 0.03} + 3%|▎ | 316/12188 [41:25<23:50:23, 7.23s/it] 3%|▎ | 317/12188 [41:33<24:10:06, 7.33s/it] {'loss': 0.4959, 'grad_norm': 0.7038270969801519, 'learning_rate': 8.633879781420765e-06, 'epoch': 0.03} + 3%|▎ | 317/12188 [41:33<24:10:06, 7.33s/it] 3%|▎ | 318/12188 [41:40<23:43:57, 7.20s/it] {'loss': 0.5314, 'grad_norm': 0.7231370930482404, 'learning_rate': 8.66120218579235e-06, 'epoch': 0.03} + 3%|▎ | 318/12188 [41:40<23:43:57, 7.20s/it] 3%|▎ | 319/12188 [41:48<25:18:53, 7.68s/it] {'loss': 0.451, 'grad_norm': 0.7290789659308253, 'learning_rate': 8.688524590163935e-06, 'epoch': 0.03} + 3%|▎ | 319/12188 [41:48<25:18:53, 7.68s/it] 3%|▎ | 320/12188 [41:55<24:13:09, 7.35s/it] {'loss': 0.4564, 'grad_norm': 0.7303738122844974, 'learning_rate': 8.71584699453552e-06, 'epoch': 0.03} + 3%|▎ | 320/12188 [41:55<24:13:09, 7.35s/it] 3%|▎ | 321/12188 [42:02<23:26:05, 7.11s/it] {'loss': 0.4653, 'grad_norm': 0.6673064334950203, 'learning_rate': 8.743169398907103e-06, 'epoch': 0.03} + 3%|▎ | 321/12188 [42:02<23:26:05, 7.11s/it] 3%|▎ | 322/12188 [42:09<23:39:36, 7.18s/it] {'loss': 0.4914, 'grad_norm': 0.7680272597113199, 'learning_rate': 8.770491803278688e-06, 'epoch': 0.03} + 3%|▎ | 322/12188 [42:09<23:39:36, 7.18s/it] 3%|▎ | 323/12188 [42:16<23:20:03, 7.08s/it] {'loss': 0.5103, 'grad_norm': 0.7456630568103154, 'learning_rate': 8.797814207650273e-06, 'epoch': 0.03} + 3%|▎ | 323/12188 [42:16<23:20:03, 7.08s/it] 3%|▎ | 324/12188 [42:22<22:51:06, 6.93s/it] {'loss': 0.4459, 'grad_norm': 0.6950247147636593, 'learning_rate': 8.825136612021858e-06, 'epoch': 0.03} + 3%|▎ | 324/12188 [42:22<22:51:06, 6.93s/it] 3%|▎ | 325/12188 [42:29<22:51:04, 6.93s/it] {'loss': 0.4509, 'grad_norm': 0.7095726681711076, 'learning_rate': 8.852459016393443e-06, 'epoch': 0.03} + 3%|▎ | 325/12188 [42:29<22:51:04, 6.93s/it] 3%|▎ | 326/12188 [42:37<23:11:42, 7.04s/it] {'loss': 0.4713, 'grad_norm': 0.6957022116801302, 'learning_rate': 8.879781420765028e-06, 'epoch': 0.03} + 3%|▎ | 326/12188 [42:37<23:11:42, 7.04s/it] 3%|▎ | 327/12188 [42:44<23:11:11, 7.04s/it] {'loss': 0.5448, 'grad_norm': 0.7315214323065525, 'learning_rate': 8.907103825136613e-06, 'epoch': 0.03} + 3%|▎ | 327/12188 [42:44<23:11:11, 7.04s/it] 3%|▎ | 328/12188 [42:51<23:17:37, 7.07s/it] {'loss': 0.4836, 'grad_norm': 0.7872172103370333, 'learning_rate': 8.934426229508197e-06, 'epoch': 0.03} + 3%|▎ | 328/12188 [42:51<23:17:37, 7.07s/it] 3%|▎ | 329/12188 [42:58<23:06:24, 7.01s/it] {'loss': 0.5001, 'grad_norm': 0.7475204197911525, 'learning_rate': 8.961748633879782e-06, 'epoch': 0.03} + 3%|▎ | 329/12188 [42:58<23:06:24, 7.01s/it] 3%|▎ | 330/12188 [43:06<24:15:20, 7.36s/it] {'loss': 0.5088, 'grad_norm': 0.7180684338768776, 'learning_rate': 8.989071038251367e-06, 'epoch': 0.03} + 3%|▎ | 330/12188 [43:06<24:15:20, 7.36s/it] 3%|▎ | 331/12188 [43:13<24:34:02, 7.46s/it] {'loss': 0.4646, 'grad_norm': 0.7620192982877894, 'learning_rate': 9.016393442622952e-06, 'epoch': 0.03} + 3%|▎ | 331/12188 [43:13<24:34:02, 7.46s/it] 3%|▎ | 332/12188 [43:21<25:04:19, 7.61s/it] {'loss': 0.4618, 'grad_norm': 0.6936782731483281, 'learning_rate': 9.043715846994537e-06, 'epoch': 0.03} + 3%|▎ | 332/12188 [43:21<25:04:19, 7.61s/it] 3%|▎ | 333/12188 [43:29<24:56:35, 7.57s/it] {'loss': 0.4737, 'grad_norm': 0.7699652288427808, 'learning_rate': 9.071038251366122e-06, 'epoch': 0.03} + 3%|▎ | 333/12188 [43:29<24:56:35, 7.57s/it] 3%|▎ | 334/12188 [43:36<24:41:03, 7.50s/it] {'loss': 0.4871, 'grad_norm': 0.7326266987448204, 'learning_rate': 9.098360655737707e-06, 'epoch': 0.03} + 3%|▎ | 334/12188 [43:36<24:41:03, 7.50s/it] 3%|▎ | 335/12188 [43:43<23:53:16, 7.26s/it] {'loss': 0.493, 'grad_norm': 0.7302372275645583, 'learning_rate': 9.12568306010929e-06, 'epoch': 0.03} + 3%|▎ | 335/12188 [43:43<23:53:16, 7.26s/it] 3%|▎ | 336/12188 [43:51<24:16:11, 7.37s/it] {'loss': 0.4664, 'grad_norm': 0.7452132566260702, 'learning_rate': 9.153005464480875e-06, 'epoch': 0.03} + 3%|▎ | 336/12188 [43:51<24:16:11, 7.37s/it] 3%|▎ | 337/12188 [43:58<24:01:21, 7.30s/it] {'loss': 0.4552, 'grad_norm': 0.7321326686430336, 'learning_rate': 9.18032786885246e-06, 'epoch': 0.03} + 3%|▎ | 337/12188 [43:58<24:01:21, 7.30s/it] 3%|▎ | 338/12188 [44:05<23:37:43, 7.18s/it] {'loss': 0.5116, 'grad_norm': 0.7091837589791525, 'learning_rate': 9.207650273224045e-06, 'epoch': 0.03} + 3%|▎ | 338/12188 [44:05<23:37:43, 7.18s/it] 3%|▎ | 339/12188 [44:12<23:47:19, 7.23s/it] {'loss': 0.4795, 'grad_norm': 0.7590251237438369, 'learning_rate': 9.23497267759563e-06, 'epoch': 0.03} + 3%|▎ | 339/12188 [44:12<23:47:19, 7.23s/it] 3%|▎ | 340/12188 [44:19<23:24:22, 7.11s/it] {'loss': 0.4621, 'grad_norm': 0.7428316985926329, 'learning_rate': 9.262295081967215e-06, 'epoch': 0.03} + 3%|▎ | 340/12188 [44:19<23:24:22, 7.11s/it] 3%|▎ | 341/12188 [44:26<23:04:16, 7.01s/it] {'loss': 0.4822, 'grad_norm': 0.7650681063577547, 'learning_rate': 9.2896174863388e-06, 'epoch': 0.03} + 3%|▎ | 341/12188 [44:26<23:04:16, 7.01s/it] 3%|▎ | 342/12188 [44:33<23:34:50, 7.17s/it] {'loss': 0.4816, 'grad_norm': 0.721221484811715, 'learning_rate': 9.316939890710383e-06, 'epoch': 0.03} + 3%|▎ | 342/12188 [44:33<23:34:50, 7.17s/it] 3%|▎ | 343/12188 [44:40<23:27:19, 7.13s/it] {'loss': 0.4697, 'grad_norm': 0.6887336677008205, 'learning_rate': 9.344262295081968e-06, 'epoch': 0.03} + 3%|▎ | 343/12188 [44:40<23:27:19, 7.13s/it] 3%|▎ | 344/12188 [44:47<22:49:11, 6.94s/it] {'loss': 0.4447, 'grad_norm': 0.7237681811072515, 'learning_rate': 9.371584699453553e-06, 'epoch': 0.03} + 3%|▎ | 344/12188 [44:47<22:49:11, 6.94s/it] 3%|▎ | 345/12188 [44:55<24:18:15, 7.39s/it] {'loss': 0.4829, 'grad_norm': 0.7454686846524716, 'learning_rate': 9.398907103825138e-06, 'epoch': 0.03} + 3%|▎ | 345/12188 [44:55<24:18:15, 7.39s/it] 3%|▎ | 346/12188 [45:02<24:20:17, 7.40s/it] {'loss': 0.4868, 'grad_norm': 0.7202549102789509, 'learning_rate': 9.426229508196723e-06, 'epoch': 0.03} + 3%|▎ | 346/12188 [45:03<24:20:17, 7.40s/it] 3%|▎ | 347/12188 [45:09<23:50:46, 7.25s/it] {'loss': 0.4691, 'grad_norm': 0.7657389869390842, 'learning_rate': 9.453551912568308e-06, 'epoch': 0.03} + 3%|▎ | 347/12188 [45:09<23:50:46, 7.25s/it] 3%|▎ | 348/12188 [45:17<24:08:13, 7.34s/it] {'loss': 0.5236, 'grad_norm': 0.6971451694403581, 'learning_rate': 9.480874316939891e-06, 'epoch': 0.03} + 3%|▎ | 348/12188 [45:17<24:08:13, 7.34s/it] 3%|▎ | 349/12188 [45:24<23:30:20, 7.15s/it] {'loss': 0.4497, 'grad_norm': 0.7778097903864223, 'learning_rate': 9.508196721311476e-06, 'epoch': 0.03} + 3%|▎ | 349/12188 [45:24<23:30:20, 7.15s/it] 3%|▎ | 350/12188 [45:30<23:03:14, 7.01s/it] {'loss': 0.5016, 'grad_norm': 0.7369490413482781, 'learning_rate': 9.535519125683061e-06, 'epoch': 0.03} + 3%|▎ | 350/12188 [45:30<23:03:14, 7.01s/it] 3%|▎ | 351/12188 [45:37<23:01:02, 7.00s/it] {'loss': 0.4605, 'grad_norm': 0.8739699109636033, 'learning_rate': 9.562841530054644e-06, 'epoch': 0.03} + 3%|▎ | 351/12188 [45:37<23:01:02, 7.00s/it] 3%|▎ | 352/12188 [45:46<24:54:12, 7.57s/it] {'loss': 0.4723, 'grad_norm': 0.7029868528561894, 'learning_rate': 9.59016393442623e-06, 'epoch': 0.03} + 3%|▎ | 352/12188 [45:46<24:54:12, 7.57s/it] 3%|▎ | 353/12188 [45:53<24:08:29, 7.34s/it] {'loss': 0.4712, 'grad_norm': 0.7535751671698064, 'learning_rate': 9.617486338797814e-06, 'epoch': 0.03} + 3%|▎ | 353/12188 [45:53<24:08:29, 7.34s/it] 3%|▎ | 354/12188 [46:00<23:26:47, 7.13s/it] {'loss': 0.4549, 'grad_norm': 0.7592360519812318, 'learning_rate': 9.6448087431694e-06, 'epoch': 0.03} + 3%|▎ | 354/12188 [46:00<23:26:47, 7.13s/it] 3%|▎ | 355/12188 [46:07<23:12:51, 7.06s/it] {'loss': 0.4632, 'grad_norm': 0.7535652921010682, 'learning_rate': 9.672131147540984e-06, 'epoch': 0.03} + 3%|▎ | 355/12188 [46:07<23:12:51, 7.06s/it] 3%|▎ | 356/12188 [46:13<22:34:49, 6.87s/it] {'loss': 0.4945, 'grad_norm': 0.722248001161935, 'learning_rate': 9.69945355191257e-06, 'epoch': 0.03} + 3%|▎ | 356/12188 [46:13<22:34:49, 6.87s/it] 3%|▎ | 357/12188 [46:20<22:18:33, 6.79s/it] {'loss': 0.5234, 'grad_norm': 0.7282775782224231, 'learning_rate': 9.726775956284153e-06, 'epoch': 0.03} + 3%|▎ | 357/12188 [46:20<22:18:33, 6.79s/it] 3%|▎ | 358/12188 [46:27<23:18:04, 7.09s/it] {'loss': 0.4572, 'grad_norm': 0.7797206309580986, 'learning_rate': 9.754098360655738e-06, 'epoch': 0.03} + 3%|▎ | 358/12188 [46:27<23:18:04, 7.09s/it] 3%|▎ | 359/12188 [46:35<23:27:10, 7.14s/it] {'loss': 0.4892, 'grad_norm': 0.7591514259181091, 'learning_rate': 9.781420765027323e-06, 'epoch': 0.03} + 3%|▎ | 359/12188 [46:35<23:27:10, 7.14s/it] 3%|▎ | 360/12188 [46:41<23:02:10, 7.01s/it] {'loss': 0.4553, 'grad_norm': 0.6880716566756042, 'learning_rate': 9.808743169398908e-06, 'epoch': 0.03} + 3%|▎ | 360/12188 [46:41<23:02:10, 7.01s/it] 3%|▎ | 361/12188 [46:50<24:55:54, 7.59s/it] {'loss': 0.4938, 'grad_norm': 0.7141878435054724, 'learning_rate': 9.836065573770493e-06, 'epoch': 0.03} + 3%|▎ | 361/12188 [46:50<24:55:54, 7.59s/it] 3%|▎ | 362/12188 [46:58<24:40:11, 7.51s/it] {'loss': 0.5462, 'grad_norm': 0.7280965697748293, 'learning_rate': 9.863387978142078e-06, 'epoch': 0.03} + 3%|▎ | 362/12188 [46:58<24:40:11, 7.51s/it] 3%|▎ | 363/12188 [47:05<24:22:25, 7.42s/it] {'loss': 0.4803, 'grad_norm': 0.7295001648732062, 'learning_rate': 9.890710382513663e-06, 'epoch': 0.03} + 3%|▎ | 363/12188 [47:05<24:22:25, 7.42s/it] 3%|▎ | 364/12188 [47:13<24:49:06, 7.56s/it] {'loss': 0.4529, 'grad_norm': 0.6847651753518992, 'learning_rate': 9.918032786885246e-06, 'epoch': 0.03} + 3%|▎ | 364/12188 [47:13<24:49:06, 7.56s/it] 3%|▎ | 365/12188 [47:19<23:48:17, 7.25s/it] {'loss': 0.4819, 'grad_norm': 0.7370494406656364, 'learning_rate': 9.945355191256831e-06, 'epoch': 0.03} + 3%|▎ | 365/12188 [47:19<23:48:17, 7.25s/it] 3%|▎ | 366/12188 [47:26<22:57:17, 6.99s/it] {'loss': 0.4523, 'grad_norm': 0.7729949941299822, 'learning_rate': 9.972677595628416e-06, 'epoch': 0.03} + 3%|▎ | 366/12188 [47:26<22:57:17, 6.99s/it] 3%|▎ | 367/12188 [47:32<22:45:20, 6.93s/it] {'loss': 0.4881, 'grad_norm': 0.775932067067094, 'learning_rate': 1e-05, 'epoch': 0.03} + 3%|▎ | 367/12188 [47:32<22:45:20, 6.93s/it] 3%|▎ | 368/12188 [47:42<24:55:27, 7.59s/it] {'loss': 0.4968, 'grad_norm': 0.7684759740655762, 'learning_rate': 9.999999823454018e-06, 'epoch': 0.03} + 3%|▎ | 368/12188 [47:42<24:55:27, 7.59s/it] 3%|▎ | 369/12188 [47:49<24:21:00, 7.42s/it] {'loss': 0.4978, 'grad_norm': 0.7018257469675304, 'learning_rate': 9.99999929381608e-06, 'epoch': 0.03} + 3%|▎ | 369/12188 [47:49<24:21:00, 7.42s/it] 3%|▎ | 370/12188 [47:58<25:52:17, 7.88s/it] {'loss': 0.4373, 'grad_norm': 0.6964270416468571, 'learning_rate': 9.999998411086225e-06, 'epoch': 0.03} + 3%|▎ | 370/12188 [47:58<25:52:17, 7.88s/it] 3%|▎ | 371/12188 [48:06<26:00:53, 7.93s/it] {'loss': 0.4653, 'grad_norm': 0.7506393329953867, 'learning_rate': 9.999997175264516e-06, 'epoch': 0.03} + 3%|▎ | 371/12188 [48:06<26:00:53, 7.93s/it] 3%|▎ | 372/12188 [48:15<27:07:10, 8.26s/it] {'loss': 0.5027, 'grad_norm': 0.7759342224387521, 'learning_rate': 9.999995586351038e-06, 'epoch': 0.03} + 3%|▎ | 372/12188 [48:15<27:07:10, 8.26s/it] 3%|▎ | 373/12188 [48:22<26:00:27, 7.92s/it] {'loss': 0.4916, 'grad_norm': 0.7612908197879767, 'learning_rate': 9.999993644345906e-06, 'epoch': 0.03} + 3%|▎ | 373/12188 [48:22<26:00:27, 7.92s/it] 3%|▎ | 374/12188 [48:29<25:24:56, 7.74s/it] {'loss': 0.4759, 'grad_norm': 0.7624981656819343, 'learning_rate': 9.999991349249256e-06, 'epoch': 0.03} + 3%|▎ | 374/12188 [48:29<25:24:56, 7.74s/it] 3%|▎ | 375/12188 [48:38<26:27:07, 8.06s/it] {'loss': 0.4768, 'grad_norm': 0.8114410658059767, 'learning_rate': 9.99998870106125e-06, 'epoch': 0.03} + 3%|▎ | 375/12188 [48:38<26:27:07, 8.06s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f7450350e00> +[Try #0] Failed to fetch sample 4780634 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f7450350e00> +Problematic sample: {'image': '20240822_131046_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Gallery'"}, {'from': 'gpt', 'value': '\nclick(x=0.263, y=0.139)\n'}]} + 3%|▎ | 376/12188 [48:45<25:23:55, 7.74s/it] {'loss': 0.4571, 'grad_norm': 0.6690881883040294, 'learning_rate': 9.999985699782075e-06, 'epoch': 0.03} + 3%|▎ | 376/12188 [48:45<25:23:55, 7.74s/it] 3%|▎ | 377/12188 [48:52<25:11:42, 7.68s/it] {'loss': 0.5032, 'grad_norm': 0.6793569290243348, 'learning_rate': 9.999982345411943e-06, 'epoch': 0.03} + 3%|▎ | 377/12188 [48:52<25:11:42, 7.68s/it] 3%|▎ | 378/12188 [49:00<24:58:35, 7.61s/it] {'loss': 0.4773, 'grad_norm': 0.7576721180571018, 'learning_rate': 9.99997863795109e-06, 'epoch': 0.03} + 3%|▎ | 378/12188 [49:00<24:58:35, 7.61s/it] 3%|▎ | 379/12188 [49:08<25:15:23, 7.70s/it] {'loss': 0.4619, 'grad_norm': 0.725333924330082, 'learning_rate': 9.99997457739978e-06, 'epoch': 0.03} + 3%|▎ | 379/12188 [49:08<25:15:23, 7.70s/it] 3%|▎ | 380/12188 [49:15<25:02:08, 7.63s/it] {'loss': 0.5093, 'grad_norm': 0.6989828858207294, 'learning_rate': 9.999970163758296e-06, 'epoch': 0.03} + 3%|▎ | 380/12188 [49:15<25:02:08, 7.63s/it] 3%|▎ | 381/12188 [49:22<24:34:21, 7.49s/it] {'loss': 0.4287, 'grad_norm': 0.7171867965154881, 'learning_rate': 9.999965397026953e-06, 'epoch': 0.03} + 3%|▎ | 381/12188 [49:22<24:34:21, 7.49s/it] 3%|▎ | 382/12188 [49:30<24:19:57, 7.42s/it] {'loss': 0.4522, 'grad_norm': 0.6545167832175092, 'learning_rate': 9.999960277206089e-06, 'epoch': 0.03} + 3%|▎ | 382/12188 [49:30<24:19:57, 7.42s/it] 3%|▎ | 383/12188 [49:40<27:27:38, 8.37s/it] {'loss': 0.4444, 'grad_norm': 0.7009539190954738, 'learning_rate': 9.99995480429606e-06, 'epoch': 0.03} + 3%|▎ | 383/12188 [49:40<27:27:38, 8.37s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 3%|▎ | 384/12188 [49:47<25:29:54, 7.78s/it] {'loss': 0.8633, 'grad_norm': 1.426807833039473, 'learning_rate': 9.999948978297259e-06, 'epoch': 0.03} + 3%|▎ | 384/12188 [49:47<25:29:54, 7.78s/it] 3%|▎ | 385/12188 [49:53<24:25:03, 7.45s/it] {'loss': 0.4892, 'grad_norm': 0.796097003615168, 'learning_rate': 9.999942799210094e-06, 'epoch': 0.03} + 3%|▎ | 385/12188 [49:53<24:25:03, 7.45s/it] 3%|▎ | 386/12188 [50:00<23:48:22, 7.26s/it] {'loss': 0.4656, 'grad_norm': 0.8001708309492076, 'learning_rate': 9.999936267035e-06, 'epoch': 0.03} + 3%|▎ | 386/12188 [50:00<23:48:22, 7.26s/it] 3%|▎ | 387/12188 [50:07<23:36:40, 7.20s/it] {'loss': 0.4845, 'grad_norm': 0.7411460798090299, 'learning_rate': 9.99992938177244e-06, 'epoch': 0.03} + 3%|▎ | 387/12188 [50:07<23:36:40, 7.20s/it] 3%|▎ | 388/12188 [50:14<23:32:53, 7.18s/it] {'loss': 0.4794, 'grad_norm': 0.7576570415683321, 'learning_rate': 9.999922143422899e-06, 'epoch': 0.03} + 3%|▎ | 388/12188 [50:14<23:32:53, 7.18s/it] 3%|▎ | 389/12188 [50:22<23:37:51, 7.21s/it] {'loss': 0.4513, 'grad_norm': 0.8146049452268994, 'learning_rate': 9.999914551986891e-06, 'epoch': 0.03} + 3%|▎ | 389/12188 [50:22<23:37:51, 7.21s/it] 3%|▎ | 390/12188 [50:28<23:16:37, 7.10s/it] {'loss': 0.4848, 'grad_norm': 0.7409984535414162, 'learning_rate': 9.99990660746495e-06, 'epoch': 0.03} + 3%|▎ | 390/12188 [50:28<23:16:37, 7.10s/it] 3%|▎ | 391/12188 [50:36<23:45:55, 7.25s/it] {'loss': 0.4439, 'grad_norm': 0.7011404749983224, 'learning_rate': 9.999898309857636e-06, 'epoch': 0.03} + 3%|▎ | 391/12188 [50:36<23:45:55, 7.25s/it] 3%|▎ | 392/12188 [50:43<23:09:51, 7.07s/it] {'loss': 0.4673, 'grad_norm': 0.6796475389623117, 'learning_rate': 9.999889659165538e-06, 'epoch': 0.03} + 3%|▎ | 392/12188 [50:43<23:09:51, 7.07s/it] 3%|▎ | 393/12188 [50:50<23:02:07, 7.03s/it] {'loss': 0.4577, 'grad_norm': 0.763895084792315, 'learning_rate': 9.999880655389265e-06, 'epoch': 0.03} + 3%|▎ | 393/12188 [50:50<23:02:07, 7.03s/it] 3%|▎ | 394/12188 [50:57<23:20:31, 7.12s/it] {'loss': 0.4973, 'grad_norm': 0.7460391447281582, 'learning_rate': 9.999871298529454e-06, 'epoch': 0.03} + 3%|▎ | 394/12188 [50:57<23:20:31, 7.12s/it] 3%|▎ | 395/12188 [51:05<24:42:13, 7.54s/it] {'loss': 0.4855, 'grad_norm': 0.8026987475580106, 'learning_rate': 9.999861588586761e-06, 'epoch': 0.03} + 3%|▎ | 395/12188 [51:06<24:42:13, 7.54s/it] 3%|▎ | 396/12188 [51:13<24:11:25, 7.39s/it] {'loss': 0.4329, 'grad_norm': 0.680503150870167, 'learning_rate': 9.999851525561878e-06, 'epoch': 0.03} + 3%|▎ | 396/12188 [51:13<24:11:25, 7.39s/it] 3%|▎ | 397/12188 [51:20<24:35:52, 7.51s/it] {'loss': 0.4767, 'grad_norm': 0.7736300784584109, 'learning_rate': 9.999841109455514e-06, 'epoch': 0.03} + 3%|▎ | 397/12188 [51:20<24:35:52, 7.51s/it] 3%|▎ | 398/12188 [51:27<24:05:26, 7.36s/it] {'loss': 0.4914, 'grad_norm': 0.7527041748308959, 'learning_rate': 9.999830340268403e-06, 'epoch': 0.03} + 3%|▎ | 398/12188 [51:27<24:05:26, 7.36s/it] 3%|▎ | 399/12188 [51:34<23:37:39, 7.22s/it] {'loss': 0.4505, 'grad_norm': 0.709155750640637, 'learning_rate': 9.999819218001306e-06, 'epoch': 0.03} + 3%|▎ | 399/12188 [51:34<23:37:39, 7.22s/it] 3%|▎ | 400/12188 [51:41<23:26:27, 7.16s/it] {'loss': 0.4692, 'grad_norm': 0.7187531434899711, 'learning_rate': 9.999807742655008e-06, 'epoch': 0.03} + 3%|▎ | 400/12188 [51:41<23:26:27, 7.16s/it] 3%|▎ | 401/12188 [51:48<23:08:20, 7.07s/it] {'loss': 0.4793, 'grad_norm': 0.7132689878929277, 'learning_rate': 9.99979591423032e-06, 'epoch': 0.03} + 3%|▎ | 401/12188 [51:48<23:08:20, 7.07s/it] 3%|▎ | 402/12188 [51:55<22:39:39, 6.92s/it] {'loss': 0.4277, 'grad_norm': 0.8172625089056298, 'learning_rate': 9.99978373272808e-06, 'epoch': 0.03} + 3%|▎ | 402/12188 [51:55<22:39:39, 6.92s/it] 3%|▎ | 403/12188 [52:02<23:26:57, 7.16s/it] {'loss': 0.4471, 'grad_norm': 0.7171741261002212, 'learning_rate': 9.999771198149142e-06, 'epoch': 0.03} + 3%|▎ | 403/12188 [52:02<23:26:57, 7.16s/it] 3%|▎ | 404/12188 [52:10<23:27:24, 7.17s/it] {'loss': 0.4259, 'grad_norm': 0.7422548952474903, 'learning_rate': 9.999758310494396e-06, 'epoch': 0.03} + 3%|▎ | 404/12188 [52:10<23:27:24, 7.17s/it] 3%|▎ | 405/12188 [52:17<23:46:42, 7.26s/it] {'loss': 0.464, 'grad_norm': 0.6974807712110668, 'learning_rate': 9.999745069764752e-06, 'epoch': 0.03} + 3%|▎ | 405/12188 [52:17<23:46:42, 7.26s/it] 3%|▎ | 406/12188 [52:24<23:11:27, 7.09s/it] {'loss': 0.5007, 'grad_norm': 0.8131548089263908, 'learning_rate': 9.999731475961144e-06, 'epoch': 0.03} + 3%|▎ | 406/12188 [52:24<23:11:27, 7.09s/it] 3%|▎ | 407/12188 [52:31<22:58:56, 7.02s/it] {'loss': 0.4886, 'grad_norm': 0.7143270848396664, 'learning_rate': 9.99971752908453e-06, 'epoch': 0.03} + 3%|▎ | 407/12188 [52:31<22:58:56, 7.02s/it] 3%|▎ | 408/12188 [52:38<23:12:35, 7.09s/it] {'loss': 0.4425, 'grad_norm': 0.6384133367635114, 'learning_rate': 9.999703229135897e-06, 'epoch': 0.03} + 3%|▎ | 408/12188 [52:38<23:12:35, 7.09s/it] 3%|▎ | 409/12188 [52:45<23:27:06, 7.17s/it] {'loss': 0.4076, 'grad_norm': 0.7166011710744892, 'learning_rate': 9.999688576116255e-06, 'epoch': 0.03} + 3%|▎ | 409/12188 [52:45<23:27:06, 7.17s/it] 3%|▎ | 410/12188 [52:52<23:11:00, 7.09s/it] {'loss': 0.4692, 'grad_norm': 0.7432014272460356, 'learning_rate': 9.999673570026639e-06, 'epoch': 0.03} + 3%|▎ | 410/12188 [52:52<23:11:00, 7.09s/it] 3%|▎ | 411/12188 [52:59<22:52:29, 6.99s/it] {'loss': 0.4673, 'grad_norm': 0.7100299945002516, 'learning_rate': 9.999658210868109e-06, 'epoch': 0.03} + 3%|▎ | 411/12188 [52:59<22:52:29, 6.99s/it] 3%|▎ | 412/12188 [53:06<22:47:15, 6.97s/it] {'loss': 0.4717, 'grad_norm': 0.7405432300457298, 'learning_rate': 9.999642498641747e-06, 'epoch': 0.03} + 3%|▎ | 412/12188 [53:06<22:47:15, 6.97s/it] 3%|▎ | 413/12188 [53:15<25:03:39, 7.66s/it] {'loss': 0.4695, 'grad_norm': 0.6661473555433549, 'learning_rate': 9.999626433348664e-06, 'epoch': 0.03} + 3%|▎ | 413/12188 [53:15<25:03:39, 7.66s/it] 3%|▎ | 414/12188 [53:25<27:14:26, 8.33s/it] {'loss': 0.4951, 'grad_norm': 0.708439477047409, 'learning_rate': 9.999610014989995e-06, 'epoch': 0.03} + 3%|▎ | 414/12188 [53:25<27:14:26, 8.33s/it] 3%|▎ | 415/12188 [53:32<26:08:01, 7.99s/it] {'loss': 0.4515, 'grad_norm': 0.753318168177001, 'learning_rate': 9.9995932435669e-06, 'epoch': 0.03} + 3%|▎ | 415/12188 [53:32<26:08:01, 7.99s/it] 3%|▎ | 416/12188 [53:39<24:53:34, 7.61s/it] {'loss': 0.4521, 'grad_norm': 0.695997847162687, 'learning_rate': 9.999576119080562e-06, 'epoch': 0.03} + 3%|▎ | 416/12188 [53:39<24:53:34, 7.61s/it] 3%|▎ | 417/12188 [53:47<25:27:45, 7.79s/it] {'loss': 0.4934, 'grad_norm': 0.6724188998671643, 'learning_rate': 9.999558641532192e-06, 'epoch': 0.03} + 3%|▎ | 417/12188 [53:47<25:27:45, 7.79s/it] 3%|▎ | 418/12188 [53:57<27:43:27, 8.48s/it] {'loss': 0.4626, 'grad_norm': 0.68020854219338, 'learning_rate': 9.99954081092302e-06, 'epoch': 0.03} + 3%|▎ | 418/12188 [53:57<27:43:27, 8.48s/it] 3%|▎ | 419/12188 [54:04<26:19:15, 8.05s/it] {'loss': 0.4403, 'grad_norm': 0.71047396486306, 'learning_rate': 9.999522627254313e-06, 'epoch': 0.03} + 3%|▎ | 419/12188 [54:04<26:19:15, 8.05s/it] 3%|▎ | 420/12188 [54:11<24:58:51, 7.64s/it] {'loss': 0.474, 'grad_norm': 0.7321064120600358, 'learning_rate': 9.999504090527347e-06, 'epoch': 0.03} + 3%|▎ | 420/12188 [54:11<24:58:51, 7.64s/it] 3%|▎ | 421/12188 [54:18<24:03:10, 7.36s/it] {'loss': 0.4488, 'grad_norm': 0.7364852647853852, 'learning_rate': 9.999485200743434e-06, 'epoch': 0.03} + 3%|▎ | 421/12188 [54:18<24:03:10, 7.36s/it] 3%|▎ | 422/12188 [54:24<23:18:40, 7.13s/it] {'loss': 0.4308, 'grad_norm': 0.6757232304792319, 'learning_rate': 9.999465957903911e-06, 'epoch': 0.03} + 3%|▎ | 422/12188 [54:24<23:18:40, 7.13s/it] 3%|▎ | 423/12188 [54:31<23:12:11, 7.10s/it] {'loss': 0.439, 'grad_norm': 0.724262273632303, 'learning_rate': 9.999446362010134e-06, 'epoch': 0.03} + 3%|▎ | 423/12188 [54:31<23:12:11, 7.10s/it] 3%|▎ | 424/12188 [54:39<23:30:23, 7.19s/it] {'loss': 0.4595, 'grad_norm': 0.7066340834090356, 'learning_rate': 9.999426413063486e-06, 'epoch': 0.03} + 3%|▎ | 424/12188 [54:39<23:30:23, 7.19s/it] 3%|▎ | 425/12188 [54:46<23:43:52, 7.26s/it] {'loss': 0.4283, 'grad_norm': 0.7992278405540538, 'learning_rate': 9.999406111065377e-06, 'epoch': 0.03} + 3%|▎ | 425/12188 [54:46<23:43:52, 7.26s/it] 3%|▎ | 426/12188 [54:53<23:33:29, 7.21s/it] {'loss': 0.4629, 'grad_norm': 0.6896584188297401, 'learning_rate': 9.999385456017241e-06, 'epoch': 0.03} + 3%|▎ | 426/12188 [54:53<23:33:29, 7.21s/it] 4%|▎ | 427/12188 [55:00<23:39:47, 7.24s/it] {'loss': 0.4729, 'grad_norm': 0.7315870806705093, 'learning_rate': 9.999364447920536e-06, 'epoch': 0.04} + 4%|▎ | 427/12188 [55:00<23:39:47, 7.24s/it] 4%|▎ | 428/12188 [55:07<23:13:38, 7.11s/it] {'loss': 0.4621, 'grad_norm': 0.7124404297869105, 'learning_rate': 9.999343086776745e-06, 'epoch': 0.04} + 4%|▎ | 428/12188 [55:07<23:13:38, 7.11s/it] 4%|▎ | 429/12188 [55:14<23:02:34, 7.05s/it] {'loss': 0.4385, 'grad_norm': 0.7218482675853214, 'learning_rate': 9.999321372587378e-06, 'epoch': 0.04} + 4%|▎ | 429/12188 [55:14<23:02:34, 7.05s/it] 4%|▎ | 430/12188 [55:24<25:30:05, 7.81s/it] {'loss': 0.4508, 'grad_norm': 0.6910480264497098, 'learning_rate': 9.99929930535397e-06, 'epoch': 0.04} + 4%|▎ | 430/12188 [55:24<25:30:05, 7.81s/it] 4%|▎ | 431/12188 [55:32<25:28:12, 7.80s/it] {'loss': 0.4644, 'grad_norm': 0.6795431181855621, 'learning_rate': 9.999276885078073e-06, 'epoch': 0.04} + 4%|▎ | 431/12188 [55:32<25:28:12, 7.80s/it] 4%|▎ | 432/12188 [55:43<29:27:24, 9.02s/it] {'loss': 0.4617, 'grad_norm': 0.6637696137242858, 'learning_rate': 9.999254111761276e-06, 'epoch': 0.04} + 4%|▎ | 432/12188 [55:43<29:27:24, 9.02s/it] 4%|▎ | 433/12188 [55:51<27:36:34, 8.46s/it] {'loss': 0.4664, 'grad_norm': 0.7436163218033263, 'learning_rate': 9.999230985405186e-06, 'epoch': 0.04} + 4%|▎ | 433/12188 [55:51<27:36:34, 8.46s/it] 4%|▎ | 434/12188 [55:59<28:00:45, 8.58s/it] {'loss': 0.5239, 'grad_norm': 0.7233271439596533, 'learning_rate': 9.999207506011435e-06, 'epoch': 0.04} + 4%|▎ | 434/12188 [55:59<28:00:45, 8.58s/it] 4%|▎ | 435/12188 [56:07<26:33:02, 8.13s/it] {'loss': 0.4548, 'grad_norm': 0.6900246104928095, 'learning_rate': 9.999183673581683e-06, 'epoch': 0.04} + 4%|▎ | 435/12188 [56:07<26:33:02, 8.13s/it] 4%|▎ | 436/12188 [56:13<25:08:09, 7.70s/it] {'loss': 0.5407, 'grad_norm': 0.6987939441103572, 'learning_rate': 9.99915948811761e-06, 'epoch': 0.04} + 4%|▎ | 436/12188 [56:13<25:08:09, 7.70s/it] 4%|▎ | 437/12188 [56:20<24:41:14, 7.56s/it] {'loss': 0.4643, 'grad_norm': 0.7687307839914107, 'learning_rate': 9.999134949620926e-06, 'epoch': 0.04} + 4%|▎ | 437/12188 [56:20<24:41:14, 7.56s/it] 4%|▎ | 438/12188 [56:27<23:33:49, 7.22s/it] {'loss': 0.4358, 'grad_norm': 0.6595324250589515, 'learning_rate': 9.999110058093365e-06, 'epoch': 0.04} + 4%|▎ | 438/12188 [56:27<23:33:49, 7.22s/it] 4%|▎ | 439/12188 [56:34<23:53:29, 7.32s/it] {'loss': 0.4907, 'grad_norm': 0.7539446339530429, 'learning_rate': 9.999084813536682e-06, 'epoch': 0.04} + 4%|▎ | 439/12188 [56:34<23:53:29, 7.32s/it] 4%|▎ | 440/12188 [56:41<23:35:04, 7.23s/it] {'loss': 0.4482, 'grad_norm': 0.730052352121622, 'learning_rate': 9.999059215952663e-06, 'epoch': 0.04} + 4%|▎ | 440/12188 [56:41<23:35:04, 7.23s/it] 4%|▎ | 441/12188 [56:48<23:16:08, 7.13s/it] {'loss': 0.4107, 'grad_norm': 0.6761419069710816, 'learning_rate': 9.999033265343112e-06, 'epoch': 0.04} + 4%|▎ | 441/12188 [56:48<23:16:08, 7.13s/it] 4%|▎ | 442/12188 [56:55<23:12:23, 7.11s/it] {'loss': 0.4147, 'grad_norm': 0.6594305253554813, 'learning_rate': 9.999006961709863e-06, 'epoch': 0.04} + 4%|▎ | 442/12188 [56:55<23:12:23, 7.11s/it] 4%|▎ | 443/12188 [57:03<23:13:09, 7.12s/it] {'loss': 0.4765, 'grad_norm': 0.7092653478339592, 'learning_rate': 9.998980305054775e-06, 'epoch': 0.04} + 4%|▎ | 443/12188 [57:03<23:13:09, 7.12s/it] 4%|▎ | 444/12188 [57:09<22:53:45, 7.02s/it] {'loss': 0.4689, 'grad_norm': 0.7499015791093236, 'learning_rate': 9.99895329537973e-06, 'epoch': 0.04} + 4%|▎ | 444/12188 [57:09<22:53:45, 7.02s/it] 4%|▎ | 445/12188 [57:17<23:04:17, 7.07s/it] {'loss': 0.4946, 'grad_norm': 0.7978937527632346, 'learning_rate': 9.998925932686633e-06, 'epoch': 0.04} + 4%|▎ | 445/12188 [57:17<23:04:17, 7.07s/it] 4%|▎ | 446/12188 [57:24<23:23:52, 7.17s/it] {'loss': 0.4365, 'grad_norm': 0.724344233116603, 'learning_rate': 9.998898216977418e-06, 'epoch': 0.04} + 4%|▎ | 446/12188 [57:24<23:23:52, 7.17s/it] 4%|▎ | 447/12188 [57:31<23:04:51, 7.08s/it] {'loss': 0.5005, 'grad_norm': 0.6736505035140128, 'learning_rate': 9.998870148254044e-06, 'epoch': 0.04} + 4%|▎ | 447/12188 [57:31<23:04:51, 7.08s/it] 4%|▎ | 448/12188 [57:38<23:17:07, 7.14s/it] {'loss': 0.4295, 'grad_norm': 0.7894997877095115, 'learning_rate': 9.99884172651849e-06, 'epoch': 0.04} + 4%|▎ | 448/12188 [57:38<23:17:07, 7.14s/it] 4%|▎ | 449/12188 [57:45<22:58:22, 7.05s/it] {'loss': 0.4315, 'grad_norm': 0.746625330966946, 'learning_rate': 9.998812951772765e-06, 'epoch': 0.04} + 4%|▎ | 449/12188 [57:45<22:58:22, 7.05s/it] 4%|▎ | 450/12188 [57:52<23:01:06, 7.06s/it] {'loss': 0.4638, 'grad_norm': 0.7928060163011839, 'learning_rate': 9.998783824018901e-06, 'epoch': 0.04} + 4%|▎ | 450/12188 [57:52<23:01:06, 7.06s/it] 4%|▎ | 451/12188 [57:59<23:09:52, 7.11s/it] {'loss': 0.4671, 'grad_norm': 0.6666179772167592, 'learning_rate': 9.998754343258954e-06, 'epoch': 0.04} + 4%|▎ | 451/12188 [57:59<23:09:52, 7.11s/it] 4%|▎ | 452/12188 [58:08<24:48:02, 7.61s/it] {'loss': 0.4291, 'grad_norm': 0.7480588607900501, 'learning_rate': 9.998724509495006e-06, 'epoch': 0.04} + 4%|▎ | 452/12188 [58:08<24:48:02, 7.61s/it] 4%|▎ | 453/12188 [58:16<24:50:18, 7.62s/it] {'loss': 0.4825, 'grad_norm': 0.8433380311067008, 'learning_rate': 9.998694322729165e-06, 'epoch': 0.04} + 4%|▎ | 453/12188 [58:16<24:50:18, 7.62s/it] 4%|▎ | 454/12188 [58:23<24:52:05, 7.63s/it] {'loss': 0.4688, 'grad_norm': 0.7209179605412437, 'learning_rate': 9.998663782963562e-06, 'epoch': 0.04} + 4%|▎ | 454/12188 [58:23<24:52:05, 7.63s/it] 4%|▎ | 455/12188 [58:30<24:10:24, 7.42s/it] {'loss': 0.4617, 'grad_norm': 0.751286971745102, 'learning_rate': 9.998632890200352e-06, 'epoch': 0.04} + 4%|▎ | 455/12188 [58:30<24:10:24, 7.42s/it] 4%|▎ | 456/12188 [58:37<23:53:35, 7.33s/it] {'loss': 0.4544, 'grad_norm': 0.7190716216183523, 'learning_rate': 9.99860164444172e-06, 'epoch': 0.04} + 4%|▎ | 456/12188 [58:37<23:53:35, 7.33s/it] 4%|▎ | 457/12188 [58:44<23:05:55, 7.09s/it] {'loss': 0.4537, 'grad_norm': 0.7178346946916926, 'learning_rate': 9.998570045689871e-06, 'epoch': 0.04} + 4%|▎ | 457/12188 [58:44<23:05:55, 7.09s/it] 4%|▍ | 458/12188 [58:51<23:15:44, 7.14s/it] {'loss': 0.4586, 'grad_norm': 0.7335286041651922, 'learning_rate': 9.998538093947035e-06, 'epoch': 0.04} + 4%|▍ | 458/12188 [58:51<23:15:44, 7.14s/it] 4%|▍ | 459/12188 [58:58<23:15:51, 7.14s/it] {'loss': 0.4838, 'grad_norm': 0.7642983229273737, 'learning_rate': 9.998505789215469e-06, 'epoch': 0.04} + 4%|▍ | 459/12188 [58:58<23:15:51, 7.14s/it] 4%|▍ | 460/12188 [59:06<23:40:38, 7.27s/it] {'loss': 0.4531, 'grad_norm': 0.7215990458463024, 'learning_rate': 9.998473131497456e-06, 'epoch': 0.04} + 4%|▍ | 460/12188 [59:06<23:40:38, 7.27s/it] 4%|▍ | 461/12188 [59:13<23:22:21, 7.18s/it] {'loss': 0.4234, 'grad_norm': 0.6603331659625503, 'learning_rate': 9.998440120795301e-06, 'epoch': 0.04} + 4%|▍ | 461/12188 [59:13<23:22:21, 7.18s/it] 4%|▍ | 462/12188 [59:20<23:02:27, 7.07s/it] {'loss': 0.4144, 'grad_norm': 0.6426368880149491, 'learning_rate': 9.998406757111334e-06, 'epoch': 0.04} + 4%|▍ | 462/12188 [59:20<23:02:27, 7.07s/it] 4%|▍ | 463/12188 [59:26<22:40:35, 6.96s/it] {'loss': 0.431, 'grad_norm': 0.6710218677280847, 'learning_rate': 9.998373040447916e-06, 'epoch': 0.04} + 4%|▍ | 463/12188 [59:26<22:40:35, 6.96s/it] 4%|▍ | 464/12188 [59:36<24:55:53, 7.66s/it] {'loss': 0.4442, 'grad_norm': 0.7293616851891388, 'learning_rate': 9.99833897080742e-06, 'epoch': 0.04} + 4%|▍ | 464/12188 [59:36<24:55:53, 7.66s/it] 4%|▍ | 465/12188 [59:43<24:18:07, 7.46s/it] {'loss': 0.4618, 'grad_norm': 0.7582560884861719, 'learning_rate': 9.998304548192259e-06, 'epoch': 0.04} + 4%|▍ | 465/12188 [59:43<24:18:07, 7.46s/it] 4%|▍ | 466/12188 [59:49<23:34:26, 7.24s/it] {'loss': 0.4641, 'grad_norm': 0.736659797658322, 'learning_rate': 9.99826977260486e-06, 'epoch': 0.04} + 4%|▍ | 466/12188 [59:49<23:34:26, 7.24s/it] 4%|▍ | 467/12188 [59:57<24:16:19, 7.45s/it] {'loss': 0.4714, 'grad_norm': 0.6772266753444388, 'learning_rate': 9.99823464404768e-06, 'epoch': 0.04} + 4%|▍ | 467/12188 [59:57<24:16:19, 7.45s/it] 4%|▍ | 468/12188 [1:00:04<23:35:13, 7.25s/it] {'loss': 0.457, 'grad_norm': 0.705940911052447, 'learning_rate': 9.9981991625232e-06, 'epoch': 0.04} + 4%|▍ | 468/12188 [1:00:04<23:35:13, 7.25s/it] 4%|▍ | 469/12188 [1:00:11<23:09:57, 7.12s/it] {'loss': 0.4402, 'grad_norm': 0.7065678446928986, 'learning_rate': 9.998163328033926e-06, 'epoch': 0.04} + 4%|▍ | 469/12188 [1:00:11<23:09:57, 7.12s/it] 4%|▍ | 470/12188 [1:00:18<23:11:16, 7.12s/it] {'loss': 0.442, 'grad_norm': 0.7374084184063965, 'learning_rate': 9.998127140582387e-06, 'epoch': 0.04} + 4%|▍ | 470/12188 [1:00:18<23:11:16, 7.12s/it] 4%|▍ | 471/12188 [1:00:25<22:41:54, 6.97s/it] {'loss': 0.4674, 'grad_norm': 0.7285734904027434, 'learning_rate': 9.99809060017114e-06, 'epoch': 0.04} + 4%|▍ | 471/12188 [1:00:25<22:41:54, 6.97s/it] 4%|▍ | 472/12188 [1:00:32<22:48:21, 7.01s/it] {'loss': 0.4, 'grad_norm': 0.6646766165577739, 'learning_rate': 9.998053706802765e-06, 'epoch': 0.04} + 4%|▍ | 472/12188 [1:00:32<22:48:21, 7.01s/it] 4%|▍ | 473/12188 [1:00:39<22:56:58, 7.05s/it] {'loss': 0.4547, 'grad_norm': 0.6986617164849681, 'learning_rate': 9.998016460479867e-06, 'epoch': 0.04} + 4%|▍ | 473/12188 [1:00:39<22:56:58, 7.05s/it] 4%|▍ | 474/12188 [1:00:46<22:42:52, 6.98s/it] {'loss': 0.4405, 'grad_norm': 0.7185044850083976, 'learning_rate': 9.997978861205077e-06, 'epoch': 0.04} + 4%|▍ | 474/12188 [1:00:46<22:42:52, 6.98s/it] 4%|▍ | 475/12188 [1:00:53<22:39:22, 6.96s/it] {'loss': 0.4611, 'grad_norm': 0.7216457821711093, 'learning_rate': 9.997940908981048e-06, 'epoch': 0.04} + 4%|▍ | 475/12188 [1:00:53<22:39:22, 6.96s/it] 4%|▍ | 476/12188 [1:01:00<23:15:51, 7.15s/it] {'loss': 0.4385, 'grad_norm': 0.6720259241671865, 'learning_rate': 9.997902603810464e-06, 'epoch': 0.04} + 4%|▍ | 476/12188 [1:01:00<23:15:51, 7.15s/it] 4%|▍ | 477/12188 [1:01:08<24:03:01, 7.39s/it] {'loss': 0.4818, 'grad_norm': 0.7192523710454518, 'learning_rate': 9.997863945696028e-06, 'epoch': 0.04} + 4%|▍ | 477/12188 [1:01:08<24:03:01, 7.39s/it] 4%|▍ | 478/12188 [1:01:18<26:39:47, 8.20s/it] {'loss': 0.4588, 'grad_norm': 0.7951320941996108, 'learning_rate': 9.997824934640467e-06, 'epoch': 0.04} + 4%|▍ | 478/12188 [1:01:18<26:39:47, 8.20s/it] 4%|▍ | 479/12188 [1:01:25<25:17:30, 7.78s/it] {'loss': 0.4971, 'grad_norm': 0.7104438699926404, 'learning_rate': 9.99778557064654e-06, 'epoch': 0.04} + 4%|▍ | 479/12188 [1:01:25<25:17:30, 7.78s/it] 4%|▍ | 480/12188 [1:01:32<24:51:52, 7.65s/it] {'loss': 0.4486, 'grad_norm': 0.7182926253126805, 'learning_rate': 9.997745853717027e-06, 'epoch': 0.04} + 4%|▍ | 480/12188 [1:01:32<24:51:52, 7.65s/it] 4%|▍ | 481/12188 [1:01:42<26:23:59, 8.12s/it] {'loss': 0.446, 'grad_norm': 0.7253063130456066, 'learning_rate': 9.99770578385473e-06, 'epoch': 0.04} + 4%|▍ | 481/12188 [1:01:42<26:23:59, 8.12s/it] 4%|▍ | 482/12188 [1:01:49<26:03:32, 8.01s/it] {'loss': 0.4282, 'grad_norm': 0.7176852772159676, 'learning_rate': 9.99766536106248e-06, 'epoch': 0.04} + 4%|▍ | 482/12188 [1:01:49<26:03:32, 8.01s/it] 4%|▍ | 483/12188 [1:01:56<24:50:56, 7.64s/it] {'loss': 0.4723, 'grad_norm': 0.704270709301284, 'learning_rate': 9.997624585343131e-06, 'epoch': 0.04} + 4%|▍ | 483/12188 [1:01:56<24:50:56, 7.64s/it] 4%|▍ | 484/12188 [1:02:03<24:17:22, 7.47s/it] {'loss': 0.5358, 'grad_norm': 0.731241569059469, 'learning_rate': 9.997583456699564e-06, 'epoch': 0.04} + 4%|▍ | 484/12188 [1:02:03<24:17:22, 7.47s/it] 4%|▍ | 485/12188 [1:02:10<23:38:41, 7.27s/it] {'loss': 0.4621, 'grad_norm': 0.7624455583420627, 'learning_rate': 9.997541975134682e-06, 'epoch': 0.04} + 4%|▍ | 485/12188 [1:02:10<23:38:41, 7.27s/it] 4%|▍ | 486/12188 [1:02:17<23:30:33, 7.23s/it] {'loss': 0.4722, 'grad_norm': 0.7092929275275877, 'learning_rate': 9.997500140651415e-06, 'epoch': 0.04} + 4%|▍ | 486/12188 [1:02:17<23:30:33, 7.23s/it] 4%|▍ | 487/12188 [1:02:25<23:40:33, 7.28s/it] {'loss': 0.4766, 'grad_norm': 0.698531382187613, 'learning_rate': 9.997457953252718e-06, 'epoch': 0.04} + 4%|▍ | 487/12188 [1:02:25<23:40:33, 7.28s/it] 4%|▍ | 488/12188 [1:02:31<23:12:02, 7.14s/it] {'loss': 0.4153, 'grad_norm': 0.6919703903092729, 'learning_rate': 9.997415412941568e-06, 'epoch': 0.04} + 4%|▍ | 488/12188 [1:02:31<23:12:02, 7.14s/it] 4%|▍ | 489/12188 [1:02:39<23:25:46, 7.21s/it] {'loss': 0.4678, 'grad_norm': 0.7150938850897955, 'learning_rate': 9.997372519720972e-06, 'epoch': 0.04} + 4%|▍ | 489/12188 [1:02:39<23:25:46, 7.21s/it] 4%|▍ | 490/12188 [1:02:47<24:06:49, 7.42s/it] {'loss': 0.4458, 'grad_norm': 0.7149162155857924, 'learning_rate': 9.997329273593957e-06, 'epoch': 0.04} + 4%|▍ | 490/12188 [1:02:47<24:06:49, 7.42s/it] 4%|▍ | 491/12188 [1:02:53<23:20:30, 7.18s/it] {'loss': 0.4338, 'grad_norm': 0.730249746093433, 'learning_rate': 9.997285674563577e-06, 'epoch': 0.04} + 4%|▍ | 491/12188 [1:02:53<23:20:30, 7.18s/it] 4%|▍ | 492/12188 [1:03:00<23:17:15, 7.17s/it] {'loss': 0.4437, 'grad_norm': 0.7081732480745071, 'learning_rate': 9.997241722632912e-06, 'epoch': 0.04} + 4%|▍ | 492/12188 [1:03:00<23:17:15, 7.17s/it] 4%|▍ | 493/12188 [1:03:08<24:02:05, 7.40s/it] {'loss': 0.4151, 'grad_norm': 0.6914755232958761, 'learning_rate': 9.997197417805064e-06, 'epoch': 0.04} + 4%|▍ | 493/12188 [1:03:08<24:02:05, 7.40s/it] 4%|▍ | 494/12188 [1:03:15<23:28:46, 7.23s/it] {'loss': 0.4264, 'grad_norm': 0.7576900550207197, 'learning_rate': 9.997152760083165e-06, 'epoch': 0.04} + 4%|▍ | 494/12188 [1:03:15<23:28:46, 7.23s/it] 4%|▍ | 495/12188 [1:03:22<23:02:49, 7.10s/it] {'loss': 0.452, 'grad_norm': 0.7075658489323291, 'learning_rate': 9.997107749470365e-06, 'epoch': 0.04} + 4%|▍ | 495/12188 [1:03:22<23:02:49, 7.10s/it] 4%|▍ | 496/12188 [1:03:29<22:43:23, 7.00s/it] {'loss': 0.4725, 'grad_norm': 0.6762066391470457, 'learning_rate': 9.997062385969845e-06, 'epoch': 0.04} + 4%|▍ | 496/12188 [1:03:29<22:43:23, 7.00s/it] 4%|▍ | 497/12188 [1:03:38<24:33:29, 7.56s/it] {'loss': 0.4102, 'grad_norm': 0.6494608258866502, 'learning_rate': 9.997016669584808e-06, 'epoch': 0.04} + 4%|▍ | 497/12188 [1:03:38<24:33:29, 7.56s/it] 4%|▍ | 498/12188 [1:03:47<26:02:02, 8.02s/it] {'loss': 0.4727, 'grad_norm': 0.6559713877689529, 'learning_rate': 9.996970600318481e-06, 'epoch': 0.04} + 4%|▍ | 498/12188 [1:03:47<26:02:02, 8.02s/it] 4%|▍ | 499/12188 [1:03:54<25:04:48, 7.72s/it] {'loss': 0.4341, 'grad_norm': 0.7685267669463628, 'learning_rate': 9.996924178174119e-06, 'epoch': 0.04} + 4%|▍ | 499/12188 [1:03:54<25:04:48, 7.72s/it] 4%|▍ | 500/12188 [1:04:00<24:01:55, 7.40s/it] {'loss': 0.4553, 'grad_norm': 0.6566814642735063, 'learning_rate': 9.996877403155e-06, 'epoch': 0.04} + 4%|▍ | 500/12188 [1:04:00<24:01:55, 7.40s/it] 4%|▍ | 501/12188 [1:04:08<24:13:42, 7.46s/it] {'loss': 0.4826, 'grad_norm': 0.7156057846306229, 'learning_rate': 9.996830275264426e-06, 'epoch': 0.04} + 4%|▍ | 501/12188 [1:04:08<24:13:42, 7.46s/it] 4%|▍ | 502/12188 [1:04:15<23:33:16, 7.26s/it] {'loss': 0.4471, 'grad_norm': 0.6825683349608124, 'learning_rate': 9.996782794505727e-06, 'epoch': 0.04} + 4%|▍ | 502/12188 [1:04:15<23:33:16, 7.26s/it] 4%|▍ | 503/12188 [1:04:22<23:10:05, 7.14s/it] {'loss': 0.4812, 'grad_norm': 0.7377768936658535, 'learning_rate': 9.996734960882254e-06, 'epoch': 0.04} + 4%|▍ | 503/12188 [1:04:22<23:10:05, 7.14s/it] 4%|▍ | 504/12188 [1:04:29<23:09:39, 7.14s/it] {'loss': 0.4646, 'grad_norm': 0.7078741000048431, 'learning_rate': 9.996686774397389e-06, 'epoch': 0.04} + 4%|▍ | 504/12188 [1:04:29<23:09:39, 7.14s/it] 4%|▍ | 505/12188 [1:04:36<23:08:12, 7.13s/it] {'loss': 0.4482, 'grad_norm': 0.6916688747392733, 'learning_rate': 9.996638235054527e-06, 'epoch': 0.04} + 4%|▍ | 505/12188 [1:04:36<23:08:12, 7.13s/it] 4%|▍ | 506/12188 [1:04:43<22:56:03, 7.07s/it] {'loss': 0.4381, 'grad_norm': 0.7093691406773603, 'learning_rate': 9.996589342857104e-06, 'epoch': 0.04} + 4%|▍ | 506/12188 [1:04:43<22:56:03, 7.07s/it] 4%|▍ | 507/12188 [1:04:49<22:25:22, 6.91s/it] {'loss': 0.4375, 'grad_norm': 0.7107470230584804, 'learning_rate': 9.996540097808567e-06, 'epoch': 0.04} + 4%|▍ | 507/12188 [1:04:49<22:25:22, 6.91s/it] 4%|▍ | 508/12188 [1:04:56<22:20:43, 6.89s/it] {'loss': 0.4992, 'grad_norm': 0.6324066038387123, 'learning_rate': 9.996490499912399e-06, 'epoch': 0.04} + 4%|▍ | 508/12188 [1:04:56<22:20:43, 6.89s/it] 4%|▍ | 509/12188 [1:05:04<23:31:13, 7.25s/it] {'loss': 0.4735, 'grad_norm': 0.7072477675604203, 'learning_rate': 9.996440549172096e-06, 'epoch': 0.04} + 4%|▍ | 509/12188 [1:05:04<23:31:13, 7.25s/it] 4%|▍ | 510/12188 [1:05:11<23:11:18, 7.15s/it] {'loss': 0.4492, 'grad_norm': 0.7177934794210498, 'learning_rate': 9.99639024559119e-06, 'epoch': 0.04} + 4%|▍ | 510/12188 [1:05:11<23:11:18, 7.15s/it] 4%|▍ | 511/12188 [1:05:18<22:52:06, 7.05s/it] {'loss': 0.4718, 'grad_norm': 0.7839993301929503, 'learning_rate': 9.996339589173232e-06, 'epoch': 0.04} + 4%|▍ | 511/12188 [1:05:18<22:52:06, 7.05s/it] 4%|▍ | 512/12188 [1:05:25<22:52:20, 7.05s/it] {'loss': 0.4375, 'grad_norm': 0.6883700832680933, 'learning_rate': 9.9962885799218e-06, 'epoch': 0.04} + 4%|▍ | 512/12188 [1:05:25<22:52:20, 7.05s/it] 4%|▍ | 513/12188 [1:05:33<23:19:23, 7.19s/it] {'loss': 0.4539, 'grad_norm': 0.7538134608751539, 'learning_rate': 9.996237217840496e-06, 'epoch': 0.04} + 4%|▍ | 513/12188 [1:05:33<23:19:23, 7.19s/it] 4%|▍ | 514/12188 [1:05:39<22:48:49, 7.04s/it] {'loss': 0.5041, 'grad_norm': 0.7843009784283251, 'learning_rate': 9.996185502932946e-06, 'epoch': 0.04} + 4%|▍ | 514/12188 [1:05:39<22:48:49, 7.04s/it] 4%|▍ | 515/12188 [1:05:46<22:26:44, 6.92s/it] {'loss': 0.4285, 'grad_norm': 0.710109448349278, 'learning_rate': 9.996133435202804e-06, 'epoch': 0.04} + 4%|▍ | 515/12188 [1:05:46<22:26:44, 6.92s/it] 4%|▍ | 516/12188 [1:05:53<22:35:25, 6.97s/it] {'loss': 0.4305, 'grad_norm': 0.6872200777377337, 'learning_rate': 9.996081014653743e-06, 'epoch': 0.04} + 4%|▍ | 516/12188 [1:05:53<22:35:25, 6.97s/it] 4%|▍ | 517/12188 [1:06:00<23:07:11, 7.13s/it] {'loss': 0.4794, 'grad_norm': 0.7222067037926857, 'learning_rate': 9.99602824128947e-06, 'epoch': 0.04} + 4%|▍ | 517/12188 [1:06:00<23:07:11, 7.13s/it] 4%|▍ | 518/12188 [1:06:08<23:08:26, 7.14s/it] {'loss': 0.4832, 'grad_norm': 0.7659243964102268, 'learning_rate': 9.995975115113708e-06, 'epoch': 0.04} + 4%|▍ | 518/12188 [1:06:08<23:08:26, 7.14s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f7889842ac0> +[Try #0] Failed to fetch sample 4606592 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f7889842ac0> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'register'"}, {'from': 'gpt', 'value': '\nclick(x=0.285, y=0.1195)\n'}]} + 4%|▍ | 519/12188 [1:06:15<23:24:43, 7.22s/it] {'loss': 0.4511, 'grad_norm': 0.7327686631577982, 'learning_rate': 9.99592163613021e-06, 'epoch': 0.04} + 4%|▍ | 519/12188 [1:06:15<23:24:43, 7.22s/it] 4%|▍ | 520/12188 [1:06:22<23:13:05, 7.16s/it] {'loss': 0.4697, 'grad_norm': 0.6703624799860225, 'learning_rate': 9.995867804342754e-06, 'epoch': 0.04} + 4%|▍ | 520/12188 [1:06:22<23:13:05, 7.16s/it] 4%|▍ | 521/12188 [1:06:30<23:48:06, 7.34s/it] {'loss': 0.4516, 'grad_norm': 0.721676156420074, 'learning_rate': 9.99581361975514e-06, 'epoch': 0.04} + 4%|▍ | 521/12188 [1:06:30<23:48:06, 7.34s/it] 4%|▍ | 522/12188 [1:06:37<23:19:23, 7.20s/it] {'loss': 0.417, 'grad_norm': 0.6630846745996025, 'learning_rate': 9.995759082371192e-06, 'epoch': 0.04} + 4%|▍ | 522/12188 [1:06:37<23:19:23, 7.20s/it] 4%|▍ | 523/12188 [1:06:44<23:37:36, 7.29s/it] {'loss': 0.4347, 'grad_norm': 0.7097672736014867, 'learning_rate': 9.995704192194767e-06, 'epoch': 0.04} + 4%|▍ | 523/12188 [1:06:44<23:37:36, 7.29s/it] 4%|▍ | 524/12188 [1:06:51<23:28:33, 7.25s/it] {'loss': 0.4381, 'grad_norm': 0.6922360012576984, 'learning_rate': 9.995648949229737e-06, 'epoch': 0.04} + 4%|▍ | 524/12188 [1:06:51<23:28:33, 7.25s/it] 4%|▍ | 525/12188 [1:06:59<23:38:34, 7.30s/it] {'loss': 0.4401, 'grad_norm': 0.6634569626058372, 'learning_rate': 9.995593353480004e-06, 'epoch': 0.04} + 4%|▍ | 525/12188 [1:06:59<23:38:34, 7.30s/it] 4%|▍ | 526/12188 [1:07:06<23:40:38, 7.31s/it] {'loss': 0.4383, 'grad_norm': 0.7200049412486922, 'learning_rate': 9.995537404949493e-06, 'epoch': 0.04} + 4%|▍ | 526/12188 [1:07:06<23:40:38, 7.31s/it] 4%|▍ | 527/12188 [1:07:13<23:37:31, 7.29s/it] {'loss': 0.4618, 'grad_norm': 0.6980183502849567, 'learning_rate': 9.99548110364216e-06, 'epoch': 0.04} + 4%|▍ | 527/12188 [1:07:13<23:37:31, 7.29s/it] 4%|▍ | 528/12188 [1:07:21<23:39:09, 7.30s/it] {'loss': 0.4553, 'grad_norm': 0.7411025243528779, 'learning_rate': 9.995424449561974e-06, 'epoch': 0.04} + 4%|▍ | 528/12188 [1:07:21<23:39:09, 7.30s/it] 4%|▍ | 529/12188 [1:07:28<23:20:25, 7.21s/it] {'loss': 0.456, 'grad_norm': 0.6987726191321368, 'learning_rate': 9.995367442712941e-06, 'epoch': 0.04} + 4%|▍ | 529/12188 [1:07:28<23:20:25, 7.21s/it] 4%|▍ | 530/12188 [1:07:35<23:07:08, 7.14s/it] {'loss': 0.4667, 'grad_norm': 0.6758170889261959, 'learning_rate': 9.995310083099085e-06, 'epoch': 0.04} + 4%|▍ | 530/12188 [1:07:35<23:07:08, 7.14s/it] 4%|▍ | 531/12188 [1:07:41<22:36:19, 6.98s/it] {'loss': 0.5104, 'grad_norm': 0.7657406549249167, 'learning_rate': 9.995252370724455e-06, 'epoch': 0.04} + 4%|▍ | 531/12188 [1:07:41<22:36:19, 6.98s/it] 4%|▍ | 532/12188 [1:07:48<22:08:19, 6.84s/it] {'loss': 0.3916, 'grad_norm': 0.6294043636795663, 'learning_rate': 9.99519430559313e-06, 'epoch': 0.04} + 4%|▍ | 532/12188 [1:07:48<22:08:19, 6.84s/it] 4%|▍ | 533/12188 [1:07:54<21:50:08, 6.74s/it] {'loss': 0.4489, 'grad_norm': 0.6853343515399332, 'learning_rate': 9.995135887709207e-06, 'epoch': 0.04} + 4%|▍ | 533/12188 [1:07:54<21:50:08, 6.74s/it] 4%|▍ | 534/12188 [1:08:02<22:52:40, 7.07s/it] {'loss': 0.4515, 'grad_norm': 0.6990569414613095, 'learning_rate': 9.995077117076812e-06, 'epoch': 0.04} + 4%|▍ | 534/12188 [1:08:02<22:52:40, 7.07s/it] 4%|▍ | 535/12188 [1:08:09<22:40:45, 7.01s/it] {'loss': 0.4444, 'grad_norm': 0.7231041042173018, 'learning_rate': 9.995017993700099e-06, 'epoch': 0.04} + 4%|▍ | 535/12188 [1:08:09<22:40:45, 7.01s/it] 4%|▍ | 536/12188 [1:08:16<22:21:14, 6.91s/it] {'loss': 0.4881, 'grad_norm': 0.7278484004519378, 'learning_rate': 9.994958517583238e-06, 'epoch': 0.04} + 4%|▍ | 536/12188 [1:08:16<22:21:14, 6.91s/it] 4%|▍ | 537/12188 [1:08:23<22:27:01, 6.94s/it] {'loss': 0.4653, 'grad_norm': 0.706974439532456, 'learning_rate': 9.994898688730432e-06, 'epoch': 0.04} + 4%|▍ | 537/12188 [1:08:23<22:27:01, 6.94s/it] 4%|▍ | 538/12188 [1:08:29<22:15:23, 6.88s/it] {'loss': 0.493, 'grad_norm': 0.753641900564539, 'learning_rate': 9.994838507145907e-06, 'epoch': 0.04} + 4%|▍ | 538/12188 [1:08:29<22:15:23, 6.88s/it] 4%|▍ | 539/12188 [1:08:37<22:31:19, 6.96s/it] {'loss': 0.4719, 'grad_norm': 0.6452820472005864, 'learning_rate': 9.994777972833911e-06, 'epoch': 0.04} + 4%|▍ | 539/12188 [1:08:37<22:31:19, 6.96s/it] 4%|▍ | 540/12188 [1:08:45<24:26:27, 7.55s/it] {'loss': 0.3934, 'grad_norm': 0.6210280686229739, 'learning_rate': 9.994717085798718e-06, 'epoch': 0.04} + 4%|▍ | 540/12188 [1:08:45<24:26:27, 7.55s/it] 4%|▍ | 541/12188 [1:08:52<23:34:51, 7.29s/it] {'loss': 0.4375, 'grad_norm': 0.7127122239825074, 'learning_rate': 9.99465584604463e-06, 'epoch': 0.04} + 4%|▍ | 541/12188 [1:08:52<23:34:51, 7.29s/it] 4%|▍ | 542/12188 [1:08:59<23:28:48, 7.26s/it] {'loss': 0.4679, 'grad_norm': 0.6866351515332538, 'learning_rate': 9.994594253575972e-06, 'epoch': 0.04} + 4%|▍ | 542/12188 [1:08:59<23:28:48, 7.26s/it] 4%|▍ | 543/12188 [1:09:07<23:28:04, 7.25s/it] {'loss': 0.4325, 'grad_norm': 0.7834169598279863, 'learning_rate': 9.99453230839709e-06, 'epoch': 0.04} + 4%|▍ | 543/12188 [1:09:07<23:28:04, 7.25s/it] 4%|▍ | 544/12188 [1:09:14<23:11:04, 7.17s/it] {'loss': 0.4925, 'grad_norm': 0.859796079968609, 'learning_rate': 9.99447001051236e-06, 'epoch': 0.04} + 4%|▍ | 544/12188 [1:09:14<23:11:04, 7.17s/it] 4%|▍ | 545/12188 [1:09:20<22:33:38, 6.98s/it] {'loss': 0.4632, 'grad_norm': 0.7033137539868488, 'learning_rate': 9.994407359926184e-06, 'epoch': 0.04} + 4%|▍ | 545/12188 [1:09:20<22:33:38, 6.98s/it] 4%|▍ | 546/12188 [1:09:27<22:38:02, 7.00s/it] {'loss': 0.4495, 'grad_norm': 0.6838865216195213, 'learning_rate': 9.994344356642982e-06, 'epoch': 0.04} + 4%|▍ | 546/12188 [1:09:27<22:38:02, 7.00s/it] 4%|▍ | 547/12188 [1:09:34<22:42:52, 7.02s/it] {'loss': 0.4408, 'grad_norm': 0.7027823168201869, 'learning_rate': 9.99428100066721e-06, 'epoch': 0.04} + 4%|▍ | 547/12188 [1:09:34<22:42:52, 7.02s/it] 4%|▍ | 548/12188 [1:09:41<22:31:30, 6.97s/it] {'loss': 0.4763, 'grad_norm': 0.668474461707662, 'learning_rate': 9.994217292003334e-06, 'epoch': 0.04} + 4%|▍ | 548/12188 [1:09:41<22:31:30, 6.97s/it] 5%|▍ | 549/12188 [1:09:48<22:10:19, 6.86s/it] {'loss': 0.4669, 'grad_norm': 0.740202838495107, 'learning_rate': 9.994153230655857e-06, 'epoch': 0.05} + 5%|▍ | 549/12188 [1:09:48<22:10:19, 6.86s/it] 5%|▍ | 550/12188 [1:09:57<24:24:26, 7.55s/it] {'loss': 0.4467, 'grad_norm': 0.6340188489242965, 'learning_rate': 9.994088816629303e-06, 'epoch': 0.05} + 5%|▍ | 550/12188 [1:09:57<24:24:26, 7.55s/it] 5%|▍ | 551/12188 [1:10:04<23:50:01, 7.37s/it] {'loss': 0.4393, 'grad_norm': 0.7530366377950458, 'learning_rate': 9.994024049928222e-06, 'epoch': 0.05} + 5%|▍ | 551/12188 [1:10:04<23:50:01, 7.37s/it] 5%|▍ | 552/12188 [1:10:13<25:58:39, 8.04s/it] {'loss': 0.4162, 'grad_norm': 0.6181649478173531, 'learning_rate': 9.993958930557184e-06, 'epoch': 0.05} + 5%|▍ | 552/12188 [1:10:13<25:58:39, 8.04s/it] 5%|▍ | 553/12188 [1:10:20<24:49:26, 7.68s/it] {'loss': 0.4586, 'grad_norm': 0.6734838772803318, 'learning_rate': 9.993893458520791e-06, 'epoch': 0.05} + 5%|▍ | 553/12188 [1:10:20<24:49:26, 7.68s/it] 5%|▍ | 554/12188 [1:10:27<24:18:05, 7.52s/it] {'loss': 0.455, 'grad_norm': 0.6868737860674475, 'learning_rate': 9.993827633823665e-06, 'epoch': 0.05} + 5%|▍ | 554/12188 [1:10:27<24:18:05, 7.52s/it] 5%|▍ | 555/12188 [1:10:36<25:28:26, 7.88s/it] {'loss': 0.4833, 'grad_norm': 0.7524520698554804, 'learning_rate': 9.993761456470454e-06, 'epoch': 0.05} + 5%|▍ | 555/12188 [1:10:36<25:28:26, 7.88s/it] 5%|▍ | 556/12188 [1:10:43<24:14:41, 7.50s/it] {'loss': 0.4339, 'grad_norm': 0.6954175807615828, 'learning_rate': 9.993694926465833e-06, 'epoch': 0.05} + 5%|▍ | 556/12188 [1:10:43<24:14:41, 7.50s/it] 5%|▍ | 557/12188 [1:10:50<23:36:30, 7.31s/it] {'loss': 0.4553, 'grad_norm': 0.6897275480699461, 'learning_rate': 9.993628043814499e-06, 'epoch': 0.05} + 5%|▍ | 557/12188 [1:10:50<23:36:30, 7.31s/it] 5%|▍ | 558/12188 [1:10:56<22:55:52, 7.10s/it] {'loss': 0.4647, 'grad_norm': 0.7063664026617026, 'learning_rate': 9.993560808521174e-06, 'epoch': 0.05} + 5%|▍ | 558/12188 [1:10:56<22:55:52, 7.10s/it] 5%|▍ | 559/12188 [1:11:03<22:49:22, 7.07s/it] {'loss': 0.4605, 'grad_norm': 0.7381091711503153, 'learning_rate': 9.99349322059061e-06, 'epoch': 0.05} + 5%|▍ | 559/12188 [1:11:03<22:49:22, 7.07s/it] 5%|▍ | 560/12188 [1:11:11<23:17:00, 7.21s/it] {'loss': 0.441, 'grad_norm': 0.7139668062011871, 'learning_rate': 9.993425280027576e-06, 'epoch': 0.05} + 5%|▍ | 560/12188 [1:11:11<23:17:00, 7.21s/it] 5%|▍ | 561/12188 [1:11:18<23:07:43, 7.16s/it] {'loss': 0.4407, 'grad_norm': 0.6638210871415727, 'learning_rate': 9.993356986836871e-06, 'epoch': 0.05} + 5%|▍ | 561/12188 [1:11:18<23:07:43, 7.16s/it] 5%|▍ | 562/12188 [1:11:24<22:38:42, 7.01s/it] {'loss': 0.4585, 'grad_norm': 0.6525424575055189, 'learning_rate': 9.993288341023317e-06, 'epoch': 0.05} + 5%|▍ | 562/12188 [1:11:24<22:38:42, 7.01s/it] 5%|▍ | 563/12188 [1:11:33<23:54:03, 7.40s/it] {'loss': 0.4252, 'grad_norm': 0.6742347788315354, 'learning_rate': 9.993219342591764e-06, 'epoch': 0.05} + 5%|▍ | 563/12188 [1:11:33<23:54:03, 7.40s/it] 5%|▍ | 564/12188 [1:11:40<23:40:19, 7.33s/it] {'loss': 0.4361, 'grad_norm': 0.693888837412908, 'learning_rate': 9.993149991547085e-06, 'epoch': 0.05} + 5%|▍ | 564/12188 [1:11:40<23:40:19, 7.33s/it] 5%|▍ | 565/12188 [1:11:47<23:39:52, 7.33s/it] {'loss': 0.4401, 'grad_norm': 0.7246773886954004, 'learning_rate': 9.993080287894172e-06, 'epoch': 0.05} + 5%|▍ | 565/12188 [1:11:47<23:39:52, 7.33s/it] 5%|▍ | 566/12188 [1:11:56<25:29:16, 7.90s/it] {'loss': 0.4414, 'grad_norm': 0.662869896661773, 'learning_rate': 9.993010231637954e-06, 'epoch': 0.05} + 5%|▍ | 566/12188 [1:11:56<25:29:16, 7.90s/it] 5%|▍ | 567/12188 [1:12:03<24:14:49, 7.51s/it] {'loss': 0.4304, 'grad_norm': 0.644278877153484, 'learning_rate': 9.992939822783374e-06, 'epoch': 0.05} + 5%|▍ | 567/12188 [1:12:03<24:14:49, 7.51s/it] 5%|▍ | 568/12188 [1:12:10<23:56:45, 7.42s/it] {'loss': 0.4656, 'grad_norm': 0.7107919870019649, 'learning_rate': 9.992869061335406e-06, 'epoch': 0.05} + 5%|▍ | 568/12188 [1:12:10<23:56:45, 7.42s/it] 5%|▍ | 569/12188 [1:12:17<23:32:01, 7.29s/it] {'loss': 0.4581, 'grad_norm': 0.7295416812611188, 'learning_rate': 9.992797947299046e-06, 'epoch': 0.05} + 5%|▍ | 569/12188 [1:12:17<23:32:01, 7.29s/it] 5%|▍ | 570/12188 [1:12:24<23:11:53, 7.19s/it] {'loss': 0.468, 'grad_norm': 0.6945077019281005, 'learning_rate': 9.992726480679318e-06, 'epoch': 0.05} + 5%|▍ | 570/12188 [1:12:24<23:11:53, 7.19s/it] 5%|▍ | 571/12188 [1:12:31<23:17:56, 7.22s/it] {'loss': 0.4237, 'grad_norm': 0.7060106582479795, 'learning_rate': 9.992654661481265e-06, 'epoch': 0.05} + 5%|▍ | 571/12188 [1:12:32<23:17:56, 7.22s/it] 5%|▍ | 572/12188 [1:12:39<23:13:05, 7.20s/it] {'loss': 0.4667, 'grad_norm': 0.7453770147494599, 'learning_rate': 9.992582489709963e-06, 'epoch': 0.05} + 5%|▍ | 572/12188 [1:12:39<23:13:05, 7.20s/it] 5%|▍ | 573/12188 [1:12:48<25:35:43, 7.93s/it] {'loss': 0.455, 'grad_norm': 0.6927326292995923, 'learning_rate': 9.992509965370507e-06, 'epoch': 0.05} + 5%|▍ | 573/12188 [1:12:48<25:35:43, 7.93s/it] 5%|▍ | 574/12188 [1:12:55<24:36:07, 7.63s/it] {'loss': 0.4195, 'grad_norm': 0.6709403457146476, 'learning_rate': 9.992437088468016e-06, 'epoch': 0.05} + 5%|▍ | 574/12188 [1:12:55<24:36:07, 7.63s/it] 5%|▍ | 575/12188 [1:13:02<24:17:07, 7.53s/it] {'loss': 0.4536, 'grad_norm': 0.7094911587840689, 'learning_rate': 9.992363859007642e-06, 'epoch': 0.05} + 5%|▍ | 575/12188 [1:13:02<24:17:07, 7.53s/it] 5%|▍ | 576/12188 [1:13:11<25:00:53, 7.76s/it] {'loss': 0.4519, 'grad_norm': 0.7971641616069425, 'learning_rate': 9.992290276994551e-06, 'epoch': 0.05} + 5%|▍ | 576/12188 [1:13:11<25:00:53, 7.76s/it] 5%|▍ | 577/12188 [1:13:18<24:04:45, 7.47s/it] {'loss': 0.4667, 'grad_norm': 0.7041060371667365, 'learning_rate': 9.992216342433943e-06, 'epoch': 0.05} + 5%|▍ | 577/12188 [1:13:18<24:04:45, 7.47s/it] 5%|▍ | 578/12188 [1:13:24<23:27:17, 7.27s/it] {'loss': 0.4768, 'grad_norm': 0.6953357617765562, 'learning_rate': 9.992142055331035e-06, 'epoch': 0.05} + 5%|▍ | 578/12188 [1:13:24<23:27:17, 7.27s/it] 5%|▍ | 579/12188 [1:13:32<23:26:44, 7.27s/it] {'loss': 0.4236, 'grad_norm': 0.6928049706538878, 'learning_rate': 9.992067415691077e-06, 'epoch': 0.05} + 5%|▍ | 579/12188 [1:13:32<23:26:44, 7.27s/it] 5%|▍ | 580/12188 [1:13:39<23:55:51, 7.42s/it] {'loss': 0.4589, 'grad_norm': 0.7587579542428657, 'learning_rate': 9.991992423519338e-06, 'epoch': 0.05} + 5%|▍ | 580/12188 [1:13:39<23:55:51, 7.42s/it] 5%|▍ | 581/12188 [1:13:48<25:25:50, 7.89s/it] {'loss': 0.4558, 'grad_norm': 0.7475497620384385, 'learning_rate': 9.991917078821113e-06, 'epoch': 0.05} + 5%|▍ | 581/12188 [1:13:48<25:25:50, 7.89s/it] 5%|▍ | 582/12188 [1:13:56<25:03:03, 7.77s/it] {'loss': 0.4632, 'grad_norm': 0.7949899318799087, 'learning_rate': 9.991841381601728e-06, 'epoch': 0.05} + 5%|▍ | 582/12188 [1:13:56<25:03:03, 7.77s/it] 5%|▍ | 583/12188 [1:14:05<26:34:48, 8.25s/it] {'loss': 0.4902, 'grad_norm': 0.6809040490606169, 'learning_rate': 9.99176533186652e-06, 'epoch': 0.05} + 5%|▍ | 583/12188 [1:14:05<26:34:48, 8.25s/it] 5%|▍ | 584/12188 [1:14:12<24:59:55, 7.76s/it] {'loss': 0.4302, 'grad_norm': 0.6979975772212924, 'learning_rate': 9.991688929620869e-06, 'epoch': 0.05} + 5%|▍ | 584/12188 [1:14:12<24:59:55, 7.76s/it][2025-08-16 22:24:48,295] [WARNING] [stage3.py:2118:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time + 5%|▍ | 585/12188 [1:14:22<27:17:17, 8.47s/it] {'loss': 0.445, 'grad_norm': 0.7085283940553014, 'learning_rate': 9.991612174870163e-06, 'epoch': 0.05} + 5%|▍ | 585/12188 [1:14:22<27:17:17, 8.47s/it] 5%|▍ | 586/12188 [1:14:29<25:41:27, 7.97s/it] {'loss': 0.4221, 'grad_norm': 0.6865328729503544, 'learning_rate': 9.991535067619824e-06, 'epoch': 0.05} + 5%|▍ | 586/12188 [1:14:29<25:41:27, 7.97s/it] 5%|▍ | 587/12188 [1:14:36<24:29:09, 7.60s/it] {'loss': 0.4329, 'grad_norm': 0.7027661578641745, 'learning_rate': 9.991457607875298e-06, 'epoch': 0.05} + 5%|▍ | 587/12188 [1:14:36<24:29:09, 7.60s/it] 5%|▍ | 588/12188 [1:14:42<23:32:48, 7.31s/it] {'loss': 0.418, 'grad_norm': 0.7038563262782168, 'learning_rate': 9.991379795642057e-06, 'epoch': 0.05} + 5%|▍ | 588/12188 [1:14:42<23:32:48, 7.31s/it] 5%|▍ | 589/12188 [1:14:49<23:05:51, 7.17s/it] {'loss': 0.4249, 'grad_norm': 0.7123180975594244, 'learning_rate': 9.991301630925592e-06, 'epoch': 0.05} + 5%|▍ | 589/12188 [1:14:49<23:05:51, 7.17s/it] 5%|▍ | 590/12188 [1:14:56<23:18:56, 7.24s/it] {'loss': 0.4181, 'grad_norm': 0.6603136226905367, 'learning_rate': 9.991223113731427e-06, 'epoch': 0.05} + 5%|▍ | 590/12188 [1:14:56<23:18:56, 7.24s/it] 5%|▍ | 591/12188 [1:15:05<24:39:54, 7.66s/it] {'loss': 0.4483, 'grad_norm': 0.7058062451971719, 'learning_rate': 9.991144244065104e-06, 'epoch': 0.05} + 5%|▍ | 591/12188 [1:15:05<24:39:54, 7.66s/it] 5%|▍ | 592/12188 [1:15:13<24:28:47, 7.60s/it] {'loss': 0.4556, 'grad_norm': 0.7727232297860002, 'learning_rate': 9.991065021932193e-06, 'epoch': 0.05} + 5%|▍ | 592/12188 [1:15:13<24:28:47, 7.60s/it] 5%|▍ | 593/12188 [1:15:20<23:53:28, 7.42s/it] {'loss': 0.4601, 'grad_norm': 0.7713043644835836, 'learning_rate': 9.990985447338288e-06, 'epoch': 0.05} + 5%|▍ | 593/12188 [1:15:20<23:53:28, 7.42s/it] 5%|▍ | 594/12188 [1:15:26<23:03:31, 7.16s/it] {'loss': 0.466, 'grad_norm': 0.7830172006085198, 'learning_rate': 9.99090552028901e-06, 'epoch': 0.05} + 5%|▍ | 594/12188 [1:15:26<23:03:31, 7.16s/it] 5%|▍ | 595/12188 [1:15:33<22:27:38, 6.97s/it] {'loss': 0.4262, 'grad_norm': 0.7260927684934873, 'learning_rate': 9.990825240790002e-06, 'epoch': 0.05} + 5%|▍ | 595/12188 [1:15:33<22:27:38, 6.97s/it] 5%|▍ | 596/12188 [1:15:39<22:16:16, 6.92s/it] {'loss': 0.457, 'grad_norm': 0.7579980410982813, 'learning_rate': 9.990744608846936e-06, 'epoch': 0.05} + 5%|▍ | 596/12188 [1:15:39<22:16:16, 6.92s/it] 5%|▍ | 597/12188 [1:15:47<22:46:41, 7.07s/it] {'loss': 0.4256, 'grad_norm': 0.7333740114144691, 'learning_rate': 9.990663624465504e-06, 'epoch': 0.05} + 5%|▍ | 597/12188 [1:15:47<22:46:41, 7.07s/it] 5%|▍ | 598/12188 [1:15:53<22:12:34, 6.90s/it] {'loss': 0.4655, 'grad_norm': 0.6932880433987787, 'learning_rate': 9.990582287651423e-06, 'epoch': 0.05} + 5%|▍ | 598/12188 [1:15:53<22:12:34, 6.90s/it] 5%|▍ | 599/12188 [1:16:00<22:12:59, 6.90s/it] {'loss': 0.4624, 'grad_norm': 0.7302920833272335, 'learning_rate': 9.99050059841044e-06, 'epoch': 0.05} + 5%|▍ | 599/12188 [1:16:00<22:12:59, 6.90s/it] 5%|▍ | 600/12188 [1:16:08<22:52:52, 7.11s/it] {'loss': 0.4612, 'grad_norm': 0.7322994154107735, 'learning_rate': 9.990418556748323e-06, 'epoch': 0.05} + 5%|▍ | 600/12188 [1:16:08<22:52:52, 7.11s/it] 5%|▍ | 601/12188 [1:16:15<22:55:57, 7.13s/it] {'loss': 0.4543, 'grad_norm': 0.723805484599338, 'learning_rate': 9.990336162670863e-06, 'epoch': 0.05} + 5%|▍ | 601/12188 [1:16:15<22:55:57, 7.13s/it] 5%|▍ | 602/12188 [1:16:22<23:07:30, 7.19s/it] {'loss': 0.4632, 'grad_norm': 0.7083508741651389, 'learning_rate': 9.990253416183882e-06, 'epoch': 0.05} + 5%|▍ | 602/12188 [1:16:22<23:07:30, 7.19s/it] 5%|▍ | 603/12188 [1:16:29<22:58:53, 7.14s/it] {'loss': 0.4458, 'grad_norm': 0.6493145592385119, 'learning_rate': 9.990170317293224e-06, 'epoch': 0.05} + 5%|▍ | 603/12188 [1:16:29<22:58:53, 7.14s/it] 5%|▍ | 604/12188 [1:16:36<22:47:21, 7.08s/it] {'loss': 0.4775, 'grad_norm': 0.7114681999241342, 'learning_rate': 9.990086866004753e-06, 'epoch': 0.05} + 5%|▍ | 604/12188 [1:16:36<22:47:21, 7.08s/it] 5%|▍ | 605/12188 [1:16:43<22:17:13, 6.93s/it] {'loss': 0.4476, 'grad_norm': 0.6424219480407392, 'learning_rate': 9.990003062324367e-06, 'epoch': 0.05} + 5%|▍ | 605/12188 [1:16:43<22:17:13, 6.93s/it] 5%|▍ | 606/12188 [1:16:50<22:52:22, 7.11s/it] {'loss': 0.4644, 'grad_norm': 0.7602770197051834, 'learning_rate': 9.989918906257979e-06, 'epoch': 0.05} + 5%|▍ | 606/12188 [1:16:50<22:52:22, 7.11s/it] 5%|▍ | 607/12188 [1:16:57<22:39:16, 7.04s/it] {'loss': 0.4429, 'grad_norm': 0.7408866090717591, 'learning_rate': 9.989834397811536e-06, 'epoch': 0.05} + 5%|▍ | 607/12188 [1:16:57<22:39:16, 7.04s/it] 5%|▍ | 608/12188 [1:17:04<22:33:58, 7.02s/it] {'loss': 0.404, 'grad_norm': 0.6918586784424133, 'learning_rate': 9.989749536991005e-06, 'epoch': 0.05} + 5%|▍ | 608/12188 [1:17:04<22:33:58, 7.02s/it] 5%|▍ | 609/12188 [1:17:13<24:36:25, 7.65s/it] {'loss': 0.4535, 'grad_norm': 0.7595382656960332, 'learning_rate': 9.989664323802378e-06, 'epoch': 0.05} + 5%|▍ | 609/12188 [1:17:13<24:36:25, 7.65s/it] 5%|▌ | 610/12188 [1:17:20<23:52:48, 7.43s/it] {'loss': 0.4142, 'grad_norm': 0.7524547885802731, 'learning_rate': 9.989578758251673e-06, 'epoch': 0.05} + 5%|▌ | 610/12188 [1:17:20<23:52:48, 7.43s/it] 5%|▌ | 611/12188 [1:17:28<24:25:23, 7.59s/it] {'loss': 0.4095, 'grad_norm': 0.6483969395237272, 'learning_rate': 9.989492840344934e-06, 'epoch': 0.05} + 5%|▌ | 611/12188 [1:17:28<24:25:23, 7.59s/it] 5%|▌ | 612/12188 [1:17:35<23:47:55, 7.40s/it] {'loss': 0.4926, 'grad_norm': 0.7086713342832945, 'learning_rate': 9.989406570088225e-06, 'epoch': 0.05} + 5%|▌ | 612/12188 [1:17:35<23:47:55, 7.40s/it] 5%|▌ | 613/12188 [1:17:42<23:33:25, 7.33s/it] {'loss': 0.4453, 'grad_norm': 0.7025032620937127, 'learning_rate': 9.989319947487641e-06, 'epoch': 0.05} + 5%|▌ | 613/12188 [1:17:42<23:33:25, 7.33s/it] 5%|▌ | 614/12188 [1:17:53<26:32:58, 8.26s/it] {'loss': 0.436, 'grad_norm': 0.6766595841801735, 'learning_rate': 9.989232972549296e-06, 'epoch': 0.05} + 5%|▌ | 614/12188 [1:17:53<26:32:58, 8.26s/it] 5%|▌ | 615/12188 [1:17:59<24:58:49, 7.77s/it] {'loss': 0.4244, 'grad_norm': 0.6779705494842774, 'learning_rate': 9.989145645279337e-06, 'epoch': 0.05} + 5%|▌ | 615/12188 [1:17:59<24:58:49, 7.77s/it] 5%|▌ | 616/12188 [1:18:07<24:46:18, 7.71s/it] {'loss': 0.4388, 'grad_norm': 0.6459569642168315, 'learning_rate': 9.989057965683926e-06, 'epoch': 0.05} + 5%|▌ | 616/12188 [1:18:07<24:46:18, 7.71s/it] 5%|▌ | 617/12188 [1:18:14<24:07:49, 7.51s/it] {'loss': 0.4486, 'grad_norm': 0.6416203750432178, 'learning_rate': 9.988969933769259e-06, 'epoch': 0.05} + 5%|▌ | 617/12188 [1:18:14<24:07:49, 7.51s/it] 5%|▌ | 618/12188 [1:18:21<23:48:16, 7.41s/it] {'loss': 0.4598, 'grad_norm': 0.6820101077787526, 'learning_rate': 9.98888154954155e-06, 'epoch': 0.05} + 5%|▌ | 618/12188 [1:18:21<23:48:16, 7.41s/it] 5%|▌ | 619/12188 [1:18:28<23:21:43, 7.27s/it] {'loss': 0.4295, 'grad_norm': 0.7878438515212305, 'learning_rate': 9.988792813007042e-06, 'epoch': 0.05} + 5%|▌ | 619/12188 [1:18:28<23:21:43, 7.27s/it] 5%|▌ | 620/12188 [1:18:35<22:55:20, 7.13s/it] {'loss': 0.459, 'grad_norm': 0.7037435249883632, 'learning_rate': 9.988703724172e-06, 'epoch': 0.05} + 5%|▌ | 620/12188 [1:18:35<22:55:20, 7.13s/it] 5%|▌ | 621/12188 [1:18:42<22:57:44, 7.15s/it] {'loss': 0.4247, 'grad_norm': 0.7377998874700837, 'learning_rate': 9.988614283042715e-06, 'epoch': 0.05} + 5%|▌ | 621/12188 [1:18:42<22:57:44, 7.15s/it] 5%|▌ | 622/12188 [1:18:52<25:32:05, 7.95s/it] {'loss': 0.4392, 'grad_norm': 0.6787425617693291, 'learning_rate': 9.988524489625505e-06, 'epoch': 0.05} + 5%|▌ | 622/12188 [1:18:52<25:32:05, 7.95s/it] 5%|▌ | 623/12188 [1:18:58<24:05:33, 7.50s/it] {'loss': 0.4127, 'grad_norm': 0.6915539089894299, 'learning_rate': 9.98843434392671e-06, 'epoch': 0.05} + 5%|▌ | 623/12188 [1:18:58<24:05:33, 7.50s/it] 5%|▌ | 624/12188 [1:19:05<23:36:12, 7.35s/it] {'loss': 0.426, 'grad_norm': 0.6327535605777087, 'learning_rate': 9.988343845952697e-06, 'epoch': 0.05} + 5%|▌ | 624/12188 [1:19:05<23:36:12, 7.35s/it] 5%|▌ | 625/12188 [1:19:13<23:29:42, 7.31s/it] {'loss': 0.4446, 'grad_norm': 0.6870731267810483, 'learning_rate': 9.988252995709855e-06, 'epoch': 0.05} + 5%|▌ | 625/12188 [1:19:13<23:29:42, 7.31s/it] 5%|▌ | 626/12188 [1:19:20<23:09:50, 7.21s/it] {'loss': 0.4818, 'grad_norm': 0.6956723479089101, 'learning_rate': 9.9881617932046e-06, 'epoch': 0.05} + 5%|▌ | 626/12188 [1:19:20<23:09:50, 7.21s/it] 5%|▌ | 627/12188 [1:19:26<22:35:36, 7.04s/it] {'loss': 0.4645, 'grad_norm': 0.7156039341480762, 'learning_rate': 9.988070238443374e-06, 'epoch': 0.05} + 5%|▌ | 627/12188 [1:19:26<22:35:36, 7.04s/it] 5%|▌ | 628/12188 [1:19:33<22:40:56, 7.06s/it] {'loss': 0.4351, 'grad_norm': 0.6456941083799821, 'learning_rate': 9.987978331432643e-06, 'epoch': 0.05} + 5%|▌ | 628/12188 [1:19:33<22:40:56, 7.06s/it] 5%|▌ | 629/12188 [1:19:42<23:58:45, 7.47s/it] {'loss': 0.4534, 'grad_norm': 0.7098513394944423, 'learning_rate': 9.987886072178896e-06, 'epoch': 0.05} + 5%|▌ | 629/12188 [1:19:42<23:58:45, 7.47s/it] 5%|▌ | 630/12188 [1:19:49<23:29:25, 7.32s/it] {'loss': 0.4643, 'grad_norm': 0.6822386545259626, 'learning_rate': 9.987793460688647e-06, 'epoch': 0.05} + 5%|▌ | 630/12188 [1:19:49<23:29:25, 7.32s/it] 5%|▌ | 631/12188 [1:19:55<22:43:47, 7.08s/it] {'loss': 0.4099, 'grad_norm': 0.68402902621502, 'learning_rate': 9.987700496968438e-06, 'epoch': 0.05} + 5%|▌ | 631/12188 [1:19:55<22:43:47, 7.08s/it] 5%|▌ | 632/12188 [1:20:02<22:34:38, 7.03s/it] {'loss': 0.4236, 'grad_norm': 0.646062415245863, 'learning_rate': 9.987607181024832e-06, 'epoch': 0.05} + 5%|▌ | 632/12188 [1:20:02<22:34:38, 7.03s/it] 5%|▌ | 633/12188 [1:20:09<22:45:38, 7.09s/it] {'loss': 0.4475, 'grad_norm': 0.7314396879814337, 'learning_rate': 9.98751351286442e-06, 'epoch': 0.05} + 5%|▌ | 633/12188 [1:20:09<22:45:38, 7.09s/it] 5%|▌ | 634/12188 [1:20:16<22:32:28, 7.02s/it] {'loss': 0.4173, 'grad_norm': 0.6561020453971673, 'learning_rate': 9.987419492493818e-06, 'epoch': 0.05} + 5%|▌ | 634/12188 [1:20:16<22:32:28, 7.02s/it] 5%|▌ | 635/12188 [1:20:23<22:17:47, 6.95s/it] {'loss': 0.4194, 'grad_norm': 0.6540807601222245, 'learning_rate': 9.987325119919664e-06, 'epoch': 0.05} + 5%|▌ | 635/12188 [1:20:23<22:17:47, 6.95s/it] 5%|▌ | 636/12188 [1:20:30<22:01:17, 6.86s/it] {'loss': 0.44, 'grad_norm': 0.7027635707121694, 'learning_rate': 9.987230395148624e-06, 'epoch': 0.05} + 5%|▌ | 636/12188 [1:20:30<22:01:17, 6.86s/it] 5%|▌ | 637/12188 [1:20:36<21:49:05, 6.80s/it] {'loss': 0.4238, 'grad_norm': 0.7142219738381628, 'learning_rate': 9.987135318187385e-06, 'epoch': 0.05} + 5%|▌ | 637/12188 [1:20:36<21:49:05, 6.80s/it] 5%|▌ | 638/12188 [1:20:43<22:04:15, 6.88s/it] {'loss': 0.4616, 'grad_norm': 0.7424527752180415, 'learning_rate': 9.98703988904266e-06, 'epoch': 0.05} + 5%|▌ | 638/12188 [1:20:43<22:04:15, 6.88s/it] 5%|▌ | 639/12188 [1:20:50<21:55:20, 6.83s/it] {'loss': 0.4753, 'grad_norm': 0.7776487531633278, 'learning_rate': 9.986944107721194e-06, 'epoch': 0.05} + 5%|▌ | 639/12188 [1:20:50<21:55:20, 6.83s/it] 5%|▌ | 640/12188 [1:20:57<21:41:47, 6.76s/it] {'loss': 0.4089, 'grad_norm': 0.6908363946190132, 'learning_rate': 9.986847974229745e-06, 'epoch': 0.05} + 5%|▌ | 640/12188 [1:20:57<21:41:47, 6.76s/it] 5%|▌ | 641/12188 [1:21:04<22:13:44, 6.93s/it] {'loss': 0.4944, 'grad_norm': 0.7043532209893897, 'learning_rate': 9.986751488575106e-06, 'epoch': 0.05} + 5%|▌ | 641/12188 [1:21:04<22:13:44, 6.93s/it] 5%|▌ | 642/12188 [1:21:11<22:14:26, 6.93s/it] {'loss': 0.4505, 'grad_norm': 0.7321317571846115, 'learning_rate': 9.986654650764086e-06, 'epoch': 0.05} + 5%|▌ | 642/12188 [1:21:11<22:14:26, 6.93s/it][2025-08-16 22:31:46,815] [WARNING] [stage3.py:2118:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time + 5%|▌ | 643/12188 [1:21:21<24:41:39, 7.70s/it] {'loss': 0.4499, 'grad_norm': 0.6529288915412493, 'learning_rate': 9.986557460803527e-06, 'epoch': 0.05} + 5%|▌ | 643/12188 [1:21:21<24:41:39, 7.70s/it] 5%|▌ | 644/12188 [1:21:27<23:56:36, 7.47s/it] {'loss': 0.4547, 'grad_norm': 0.6453874812953793, 'learning_rate': 9.986459918700293e-06, 'epoch': 0.05} + 5%|▌ | 644/12188 [1:21:27<23:56:36, 7.47s/it] 5%|▌ | 645/12188 [1:21:35<24:03:39, 7.50s/it] {'loss': 0.4424, 'grad_norm': 0.6861533265573238, 'learning_rate': 9.98636202446127e-06, 'epoch': 0.05} + 5%|▌ | 645/12188 [1:21:35<24:03:39, 7.50s/it] 5%|▌ | 646/12188 [1:21:42<23:13:42, 7.25s/it] {'loss': 0.4829, 'grad_norm': 0.705464440165486, 'learning_rate': 9.986263778093373e-06, 'epoch': 0.05} + 5%|▌ | 646/12188 [1:21:42<23:13:42, 7.25s/it] 5%|▌ | 647/12188 [1:21:51<25:27:02, 7.94s/it] {'loss': 0.4646, 'grad_norm': 0.8118832790191072, 'learning_rate': 9.986165179603538e-06, 'epoch': 0.05} + 5%|▌ | 647/12188 [1:21:51<25:27:02, 7.94s/it] 5%|▌ | 648/12188 [1:21:58<24:09:53, 7.54s/it] {'loss': 0.4385, 'grad_norm': 0.6981390745350371, 'learning_rate': 9.98606622899873e-06, 'epoch': 0.05} + 5%|▌ | 648/12188 [1:21:58<24:09:53, 7.54s/it] 5%|▌ | 649/12188 [1:22:04<23:11:19, 7.23s/it] {'loss': 0.4294, 'grad_norm': 0.764402786484223, 'learning_rate': 9.985966926285935e-06, 'epoch': 0.05} + 5%|▌ | 649/12188 [1:22:04<23:11:19, 7.23s/it] 5%|▌ | 650/12188 [1:22:12<23:18:43, 7.27s/it] {'loss': 0.4996, 'grad_norm': 0.7151047594318614, 'learning_rate': 9.985867271472166e-06, 'epoch': 0.05} + 5%|▌ | 650/12188 [1:22:12<23:18:43, 7.27s/it] 5%|▌ | 651/12188 [1:22:19<23:22:53, 7.30s/it] {'loss': 0.4161, 'grad_norm': 0.6655314453332335, 'learning_rate': 9.985767264564461e-06, 'epoch': 0.05} + 5%|▌ | 651/12188 [1:22:19<23:22:53, 7.30s/it] 5%|▌ | 652/12188 [1:22:26<23:04:13, 7.20s/it] {'loss': 0.4506, 'grad_norm': 0.6590476819004433, 'learning_rate': 9.985666905569882e-06, 'epoch': 0.05} + 5%|▌ | 652/12188 [1:22:26<23:04:13, 7.20s/it] 5%|▌ | 653/12188 [1:22:33<22:46:02, 7.11s/it] {'loss': 0.4394, 'grad_norm': 0.7214419447852418, 'learning_rate': 9.985566194495516e-06, 'epoch': 0.05} + 5%|▌ | 653/12188 [1:22:33<22:46:02, 7.11s/it] 5%|▌ | 654/12188 [1:22:40<22:35:01, 7.05s/it] {'loss': 0.4294, 'grad_norm': 0.653003124772566, 'learning_rate': 9.985465131348475e-06, 'epoch': 0.05} + 5%|▌ | 654/12188 [1:22:40<22:35:01, 7.05s/it] 5%|▌ | 655/12188 [1:22:48<23:14:53, 7.26s/it] {'loss': 0.4116, 'grad_norm': 0.6576654232270568, 'learning_rate': 9.985363716135896e-06, 'epoch': 0.05} + 5%|▌ | 655/12188 [1:22:48<23:14:53, 7.26s/it] 5%|▌ | 656/12188 [1:22:55<23:41:16, 7.39s/it] {'loss': 0.4514, 'grad_norm': 0.6868513334775568, 'learning_rate': 9.985261948864941e-06, 'epoch': 0.05} + 5%|▌ | 656/12188 [1:22:55<23:41:16, 7.39s/it] 5%|▌ | 657/12188 [1:23:03<24:09:23, 7.54s/it] {'loss': 0.4398, 'grad_norm': 0.7034376887946833, 'learning_rate': 9.985159829542798e-06, 'epoch': 0.05} + 5%|▌ | 657/12188 [1:23:03<24:09:23, 7.54s/it] 5%|▌ | 658/12188 [1:23:10<23:48:12, 7.43s/it] {'loss': 0.3851, 'grad_norm': 0.7063276033071738, 'learning_rate': 9.985057358176675e-06, 'epoch': 0.05} + 5%|▌ | 658/12188 [1:23:10<23:48:12, 7.43s/it] 5%|▌ | 659/12188 [1:23:18<23:34:11, 7.36s/it] {'loss': 0.4661, 'grad_norm': 0.6738112518845624, 'learning_rate': 9.984954534773812e-06, 'epoch': 0.05} + 5%|▌ | 659/12188 [1:23:18<23:34:11, 7.36s/it] 5%|▌ | 660/12188 [1:23:25<23:34:53, 7.36s/it] {'loss': 0.4591, 'grad_norm': 0.7561419868890678, 'learning_rate': 9.984851359341469e-06, 'epoch': 0.05} + 5%|▌ | 660/12188 [1:23:25<23:34:53, 7.36s/it] 5%|▌ | 661/12188 [1:23:35<25:45:26, 8.04s/it] {'loss': 0.4568, 'grad_norm': 0.6850450369343807, 'learning_rate': 9.98474783188693e-06, 'epoch': 0.05} + 5%|▌ | 661/12188 [1:23:35<25:45:26, 8.04s/it] 5%|▌ | 662/12188 [1:23:42<24:44:30, 7.73s/it] {'loss': 0.4772, 'grad_norm': 0.7081006507074291, 'learning_rate': 9.98464395241751e-06, 'epoch': 0.05} + 5%|▌ | 662/12188 [1:23:42<24:44:30, 7.73s/it] 5%|▌ | 663/12188 [1:23:49<24:00:11, 7.50s/it] {'loss': 0.4562, 'grad_norm': 0.6898729857188778, 'learning_rate': 9.98453972094054e-06, 'epoch': 0.05} + 5%|▌ | 663/12188 [1:23:49<24:00:11, 7.50s/it] 5%|▌ | 664/12188 [1:23:56<24:05:12, 7.52s/it] {'loss': 0.4227, 'grad_norm': 0.6316791515209615, 'learning_rate': 9.984435137463386e-06, 'epoch': 0.05} + 5%|▌ | 664/12188 [1:23:56<24:05:12, 7.52s/it] 5%|▌ | 665/12188 [1:24:03<23:44:59, 7.42s/it] {'loss': 0.4053, 'grad_norm': 0.6355303085714438, 'learning_rate': 9.984330201993428e-06, 'epoch': 0.05} + 5%|▌ | 665/12188 [1:24:03<23:44:59, 7.42s/it] 5%|▌ | 666/12188 [1:24:11<23:39:43, 7.39s/it] {'loss': 0.4353, 'grad_norm': 0.6740317890759678, 'learning_rate': 9.98422491453808e-06, 'epoch': 0.05} + 5%|▌ | 666/12188 [1:24:11<23:39:43, 7.39s/it] 5%|▌ | 667/12188 [1:24:18<23:53:27, 7.47s/it] {'loss': 0.4351, 'grad_norm': 0.6656941607213188, 'learning_rate': 9.984119275104778e-06, 'epoch': 0.05} + 5%|▌ | 667/12188 [1:24:18<23:53:27, 7.47s/it] 5%|▌ | 668/12188 [1:24:27<25:17:42, 7.90s/it] {'loss': 0.4512, 'grad_norm': 0.7524910923546507, 'learning_rate': 9.984013283700979e-06, 'epoch': 0.05} + 5%|▌ | 668/12188 [1:24:27<25:17:42, 7.90s/it] 5%|▌ | 669/12188 [1:24:35<25:24:27, 7.94s/it] {'loss': 0.4145, 'grad_norm': 0.6727451923963934, 'learning_rate': 9.98390694033417e-06, 'epoch': 0.05} + 5%|▌ | 669/12188 [1:24:35<25:24:27, 7.94s/it] 5%|▌ | 670/12188 [1:24:42<24:10:33, 7.56s/it] {'loss': 0.4755, 'grad_norm': 0.7223381738399713, 'learning_rate': 9.983800245011858e-06, 'epoch': 0.05} + 5%|▌ | 670/12188 [1:24:42<24:10:33, 7.56s/it] 6%|▌ | 671/12188 [1:24:49<23:23:32, 7.31s/it] {'loss': 0.4573, 'grad_norm': 0.7358819047761049, 'learning_rate': 9.983693197741581e-06, 'epoch': 0.06} + 6%|▌ | 671/12188 [1:24:49<23:23:32, 7.31s/it] 6%|▌ | 672/12188 [1:24:58<24:59:48, 7.81s/it] {'loss': 0.4016, 'grad_norm': 0.7191257984876378, 'learning_rate': 9.983585798530898e-06, 'epoch': 0.06} + 6%|▌ | 672/12188 [1:24:58<24:59:48, 7.81s/it] 6%|▌ | 673/12188 [1:25:05<24:42:44, 7.73s/it] {'loss': 0.4873, 'grad_norm': 0.6577727538401741, 'learning_rate': 9.983478047387392e-06, 'epoch': 0.06} + 6%|▌ | 673/12188 [1:25:05<24:42:44, 7.73s/it] 6%|▌ | 674/12188 [1:25:12<23:43:47, 7.42s/it] {'loss': 0.4393, 'grad_norm': 0.6772138522715679, 'learning_rate': 9.983369944318673e-06, 'epoch': 0.06} + 6%|▌ | 674/12188 [1:25:12<23:43:47, 7.42s/it] 6%|▌ | 675/12188 [1:25:19<23:29:49, 7.35s/it] {'loss': 0.4327, 'grad_norm': 0.6852212196493609, 'learning_rate': 9.983261489332375e-06, 'epoch': 0.06} + 6%|▌ | 675/12188 [1:25:19<23:29:49, 7.35s/it] 6%|▌ | 676/12188 [1:25:28<24:37:43, 7.70s/it] {'loss': 0.476, 'grad_norm': 0.7127118245753811, 'learning_rate': 9.983152682436156e-06, 'epoch': 0.06} + 6%|▌ | 676/12188 [1:25:28<24:37:43, 7.70s/it] 6%|▌ | 677/12188 [1:25:34<23:46:47, 7.44s/it] {'loss': 0.4611, 'grad_norm': 0.7967448536590878, 'learning_rate': 9.983043523637703e-06, 'epoch': 0.06} + 6%|▌ | 677/12188 [1:25:34<23:46:47, 7.44s/it] 6%|▌ | 678/12188 [1:25:41<23:27:13, 7.34s/it] {'loss': 0.4768, 'grad_norm': 0.773059351793155, 'learning_rate': 9.98293401294472e-06, 'epoch': 0.06} + 6%|▌ | 678/12188 [1:25:41<23:27:13, 7.34s/it] 6%|▌ | 679/12188 [1:25:49<23:16:04, 7.28s/it] {'loss': 0.4544, 'grad_norm': 0.6807979706816126, 'learning_rate': 9.982824150364943e-06, 'epoch': 0.06} + 6%|▌ | 679/12188 [1:25:49<23:16:04, 7.28s/it] 6%|▌ | 680/12188 [1:25:55<22:52:44, 7.16s/it] {'loss': 0.4793, 'grad_norm': 0.6893915945536586, 'learning_rate': 9.982713935906132e-06, 'epoch': 0.06} + 6%|▌ | 680/12188 [1:25:55<22:52:44, 7.16s/it] 6%|▌ | 681/12188 [1:26:03<22:53:02, 7.16s/it] {'loss': 0.4464, 'grad_norm': 0.6392432448165112, 'learning_rate': 9.982603369576065e-06, 'epoch': 0.06} + 6%|▌ | 681/12188 [1:26:03<22:53:02, 7.16s/it] 6%|▌ | 682/12188 [1:26:10<22:53:59, 7.16s/it] {'loss': 0.4139, 'grad_norm': 0.6625419182332756, 'learning_rate': 9.982492451382554e-06, 'epoch': 0.06} + 6%|▌ | 682/12188 [1:26:10<22:53:59, 7.16s/it] 6%|▌ | 683/12188 [1:26:17<22:32:58, 7.06s/it] {'loss': 0.4636, 'grad_norm': 0.6811076581172595, 'learning_rate': 9.982381181333433e-06, 'epoch': 0.06} + 6%|▌ | 683/12188 [1:26:17<22:32:58, 7.06s/it] 6%|▌ | 684/12188 [1:26:24<22:25:16, 7.02s/it] {'loss': 0.3796, 'grad_norm': 0.644709537808952, 'learning_rate': 9.982269559436557e-06, 'epoch': 0.06} + 6%|▌ | 684/12188 [1:26:24<22:25:16, 7.02s/it] 6%|▌ | 685/12188 [1:26:30<22:17:54, 6.98s/it] {'loss': 0.3892, 'grad_norm': 0.6925131944653402, 'learning_rate': 9.982157585699808e-06, 'epoch': 0.06} + 6%|▌ | 685/12188 [1:26:30<22:17:54, 6.98s/it] 6%|▌ | 686/12188 [1:26:37<22:04:08, 6.91s/it] {'loss': 0.3965, 'grad_norm': 0.6267594752148631, 'learning_rate': 9.982045260131096e-06, 'epoch': 0.06} + 6%|▌ | 686/12188 [1:26:37<22:04:08, 6.91s/it] 6%|▌ | 687/12188 [1:26:44<21:53:46, 6.85s/it] {'loss': 0.4728, 'grad_norm': 0.7083275003275775, 'learning_rate': 9.981932582738352e-06, 'epoch': 0.06} + 6%|▌ | 687/12188 [1:26:44<21:53:46, 6.85s/it] 6%|▌ | 688/12188 [1:26:51<21:49:44, 6.83s/it] {'loss': 0.4177, 'grad_norm': 0.6775895818256653, 'learning_rate': 9.981819553529533e-06, 'epoch': 0.06} + 6%|▌ | 688/12188 [1:26:51<21:49:44, 6.83s/it] 6%|▌ | 689/12188 [1:26:58<22:14:38, 6.96s/it] {'loss': 0.4701, 'grad_norm': 0.7095397771413141, 'learning_rate': 9.98170617251262e-06, 'epoch': 0.06} + 6%|▌ | 689/12188 [1:26:58<22:14:38, 6.96s/it] 6%|▌ | 690/12188 [1:27:05<21:58:07, 6.88s/it] {'loss': 0.4434, 'grad_norm': 0.7026091176344283, 'learning_rate': 9.981592439695622e-06, 'epoch': 0.06} + 6%|▌ | 690/12188 [1:27:05<21:58:07, 6.88s/it] 6%|▌ | 691/12188 [1:27:13<22:56:17, 7.18s/it] {'loss': 0.444, 'grad_norm': 0.6823833914164218, 'learning_rate': 9.98147835508657e-06, 'epoch': 0.06} + 6%|▌ | 691/12188 [1:27:13<22:56:17, 7.18s/it] 6%|▌ | 692/12188 [1:27:19<22:35:29, 7.07s/it] {'loss': 0.4396, 'grad_norm': 0.6316488009161602, 'learning_rate': 9.981363918693518e-06, 'epoch': 0.06} + 6%|▌ | 692/12188 [1:27:19<22:35:29, 7.07s/it] 6%|▌ | 693/12188 [1:27:26<22:23:19, 7.01s/it] {'loss': 0.4641, 'grad_norm': 0.800062822143366, 'learning_rate': 9.981249130524551e-06, 'epoch': 0.06} + 6%|▌ | 693/12188 [1:27:26<22:23:19, 7.01s/it] 6%|▌ | 694/12188 [1:27:33<21:51:44, 6.85s/it] {'loss': 0.4618, 'grad_norm': 0.6631359653992758, 'learning_rate': 9.981133990587773e-06, 'epoch': 0.06} + 6%|▌ | 694/12188 [1:27:33<21:51:44, 6.85s/it] 6%|▌ | 695/12188 [1:27:40<22:11:35, 6.95s/it] {'loss': 0.448, 'grad_norm': 0.702374822599577, 'learning_rate': 9.981018498891314e-06, 'epoch': 0.06} + 6%|▌ | 695/12188 [1:27:40<22:11:35, 6.95s/it] 6%|▌ | 696/12188 [1:27:47<22:20:37, 7.00s/it] {'loss': 0.3994, 'grad_norm': 0.6494507164843467, 'learning_rate': 9.980902655443332e-06, 'epoch': 0.06} + 6%|▌ | 696/12188 [1:27:47<22:20:37, 7.00s/it] 6%|▌ | 697/12188 [1:27:54<22:05:59, 6.92s/it] {'loss': 0.471, 'grad_norm': 0.6749110269107862, 'learning_rate': 9.980786460252007e-06, 'epoch': 0.06} + 6%|▌ | 697/12188 [1:27:54<22:05:59, 6.92s/it] 6%|▌ | 698/12188 [1:28:04<25:01:02, 7.84s/it] {'loss': 0.4364, 'grad_norm': 0.7157541993889127, 'learning_rate': 9.980669913325545e-06, 'epoch': 0.06} + 6%|▌ | 698/12188 [1:28:04<25:01:02, 7.84s/it] 6%|▌ | 699/12188 [1:28:11<24:20:01, 7.62s/it] {'loss': 0.4148, 'grad_norm': 0.627808033699411, 'learning_rate': 9.980553014672175e-06, 'epoch': 0.06} + 6%|▌ | 699/12188 [1:28:11<24:20:01, 7.62s/it] 6%|▌ | 700/12188 [1:28:19<24:29:47, 7.68s/it] {'loss': 0.4673, 'grad_norm': 0.6538797207534921, 'learning_rate': 9.980435764300154e-06, 'epoch': 0.06} + 6%|▌ | 700/12188 [1:28:19<24:29:47, 7.68s/it] 6%|▌ | 701/12188 [1:28:25<23:31:28, 7.37s/it] {'loss': 0.4464, 'grad_norm': 0.6957043648521234, 'learning_rate': 9.980318162217759e-06, 'epoch': 0.06} + 6%|▌ | 701/12188 [1:28:25<23:31:28, 7.37s/it] 6%|▌ | 702/12188 [1:28:32<23:14:36, 7.29s/it] {'loss': 0.4401, 'grad_norm': 0.6578536128527104, 'learning_rate': 9.9802002084333e-06, 'epoch': 0.06} + 6%|▌ | 702/12188 [1:28:32<23:14:36, 7.29s/it] 6%|▌ | 703/12188 [1:28:39<22:50:42, 7.16s/it] {'loss': 0.4278, 'grad_norm': 0.7163350057026545, 'learning_rate': 9.980081902955102e-06, 'epoch': 0.06} + 6%|▌ | 703/12188 [1:28:39<22:50:42, 7.16s/it] 6%|▌ | 704/12188 [1:28:46<22:27:50, 7.04s/it] {'loss': 0.412, 'grad_norm': 0.7027717164373313, 'learning_rate': 9.979963245791519e-06, 'epoch': 0.06} + 6%|▌ | 704/12188 [1:28:46<22:27:50, 7.04s/it] 6%|▌ | 705/12188 [1:28:53<22:00:34, 6.90s/it] {'loss': 0.4644, 'grad_norm': 0.6812319484730992, 'learning_rate': 9.979844236950936e-06, 'epoch': 0.06} + 6%|▌ | 705/12188 [1:28:53<22:00:34, 6.90s/it] 6%|▌ | 706/12188 [1:28:59<22:00:00, 6.90s/it] {'loss': 0.4413, 'grad_norm': 0.788791247577929, 'learning_rate': 9.979724876441751e-06, 'epoch': 0.06} + 6%|▌ | 706/12188 [1:28:59<22:00:00, 6.90s/it] 6%|▌ | 707/12188 [1:29:06<21:57:51, 6.89s/it] {'loss': 0.4693, 'grad_norm': 0.7049188175294242, 'learning_rate': 9.979605164272396e-06, 'epoch': 0.06} + 6%|▌ | 707/12188 [1:29:06<21:57:51, 6.89s/it] 6%|▌ | 708/12188 [1:29:14<23:01:24, 7.22s/it] {'loss': 0.4888, 'grad_norm': 0.7013926995201676, 'learning_rate': 9.979485100451327e-06, 'epoch': 0.06} + 6%|▌ | 708/12188 [1:29:14<23:01:24, 7.22s/it] 6%|▌ | 709/12188 [1:29:22<23:40:06, 7.42s/it] {'loss': 0.4537, 'grad_norm': 0.6601199182502906, 'learning_rate': 9.979364684987018e-06, 'epoch': 0.06} + 6%|▌ | 709/12188 [1:29:22<23:40:06, 7.42s/it] 6%|▌ | 710/12188 [1:29:29<22:40:22, 7.11s/it] {'loss': 0.4211, 'grad_norm': 0.7274908349236442, 'learning_rate': 9.979243917887977e-06, 'epoch': 0.06} + 6%|▌ | 710/12188 [1:29:29<22:40:22, 7.11s/it] 6%|▌ | 711/12188 [1:29:35<22:22:25, 7.02s/it] {'loss': 0.4437, 'grad_norm': 0.7123192772247481, 'learning_rate': 9.979122799162729e-06, 'epoch': 0.06} + 6%|▌ | 711/12188 [1:29:35<22:22:25, 7.02s/it] 6%|▌ | 712/12188 [1:29:43<23:02:23, 7.23s/it] {'loss': 0.4191, 'grad_norm': 0.6746404604710475, 'learning_rate': 9.979001328819828e-06, 'epoch': 0.06} + 6%|▌ | 712/12188 [1:29:43<23:02:23, 7.23s/it] 6%|▌ | 713/12188 [1:29:51<23:18:06, 7.31s/it] {'loss': 0.4431, 'grad_norm': 0.710890037481162, 'learning_rate': 9.978879506867854e-06, 'epoch': 0.06} + 6%|▌ | 713/12188 [1:29:51<23:18:06, 7.31s/it] 6%|▌ | 714/12188 [1:30:01<26:05:45, 8.19s/it] {'loss': 0.4204, 'grad_norm': 0.6666099100651415, 'learning_rate': 9.978757333315406e-06, 'epoch': 0.06} + 6%|▌ | 714/12188 [1:30:01<26:05:45, 8.19s/it] 6%|▌ | 715/12188 [1:30:08<24:43:51, 7.76s/it] {'loss': 0.4428, 'grad_norm': 0.697978952189993, 'learning_rate': 9.978634808171116e-06, 'epoch': 0.06} + 6%|▌ | 715/12188 [1:30:08<24:43:51, 7.76s/it] 6%|▌ | 716/12188 [1:30:15<24:46:43, 7.78s/it] {'loss': 0.4129, 'grad_norm': 0.6509701629221485, 'learning_rate': 9.978511931443636e-06, 'epoch': 0.06} + 6%|▌ | 716/12188 [1:30:15<24:46:43, 7.78s/it] 6%|▌ | 717/12188 [1:30:23<24:25:23, 7.66s/it] {'loss': 0.4998, 'grad_norm': 0.7071397847671119, 'learning_rate': 9.97838870314164e-06, 'epoch': 0.06} + 6%|▌ | 717/12188 [1:30:23<24:25:23, 7.66s/it] 6%|▌ | 718/12188 [1:30:30<23:42:34, 7.44s/it] {'loss': 0.4489, 'grad_norm': 0.7112688537716723, 'learning_rate': 9.978265123273831e-06, 'epoch': 0.06} + 6%|▌ | 718/12188 [1:30:30<23:42:34, 7.44s/it] 6%|▌ | 719/12188 [1:30:37<23:16:07, 7.30s/it] {'loss': 0.5004, 'grad_norm': 0.6841852797025005, 'learning_rate': 9.97814119184894e-06, 'epoch': 0.06} + 6%|▌ | 719/12188 [1:30:37<23:16:07, 7.30s/it] 6%|▌ | 720/12188 [1:30:44<22:48:43, 7.16s/it] {'loss': 0.4212, 'grad_norm': 0.7096267098111431, 'learning_rate': 9.978016908875714e-06, 'epoch': 0.06} + 6%|▌ | 720/12188 [1:30:44<22:48:43, 7.16s/it] 6%|▌ | 721/12188 [1:30:50<22:23:19, 7.03s/it] {'loss': 0.4753, 'grad_norm': 0.7096661506217141, 'learning_rate': 9.977892274362933e-06, 'epoch': 0.06} + 6%|▌ | 721/12188 [1:30:50<22:23:19, 7.03s/it] 6%|▌ | 722/12188 [1:30:57<22:18:23, 7.00s/it] {'loss': 0.416, 'grad_norm': 0.6769399770882867, 'learning_rate': 9.977767288319396e-06, 'epoch': 0.06} + 6%|▌ | 722/12188 [1:30:57<22:18:23, 7.00s/it] 6%|▌ | 723/12188 [1:31:04<21:51:20, 6.86s/it] {'loss': 0.4574, 'grad_norm': 0.6800702883065445, 'learning_rate': 9.97764195075393e-06, 'epoch': 0.06} + 6%|▌ | 723/12188 [1:31:04<21:51:20, 6.86s/it] 6%|▌ | 724/12188 [1:31:11<22:29:49, 7.06s/it] {'loss': 0.4208, 'grad_norm': 0.6914055545089807, 'learning_rate': 9.977516261675389e-06, 'epoch': 0.06} + 6%|▌ | 724/12188 [1:31:11<22:29:49, 7.06s/it] 6%|▌ | 725/12188 [1:31:19<23:00:09, 7.22s/it] {'loss': 0.4323, 'grad_norm': 0.6876058207583066, 'learning_rate': 9.977390221092645e-06, 'epoch': 0.06} + 6%|▌ | 725/12188 [1:31:19<23:00:09, 7.22s/it] 6%|▌ | 726/12188 [1:31:26<23:04:58, 7.25s/it] {'loss': 0.4923, 'grad_norm': 0.6873223520748643, 'learning_rate': 9.977263829014599e-06, 'epoch': 0.06} + 6%|▌ | 726/12188 [1:31:26<23:04:58, 7.25s/it] 6%|▌ | 727/12188 [1:31:34<23:38:53, 7.43s/it] {'loss': 0.4875, 'grad_norm': 0.7408039917168159, 'learning_rate': 9.97713708545018e-06, 'epoch': 0.06} + 6%|▌ | 727/12188 [1:31:34<23:38:53, 7.43s/it] 6%|▌ | 728/12188 [1:31:41<23:28:07, 7.37s/it] {'loss': 0.4908, 'grad_norm': 0.7329918634377439, 'learning_rate': 9.977009990408336e-06, 'epoch': 0.06} + 6%|▌ | 728/12188 [1:31:41<23:28:07, 7.37s/it] 6%|▌ | 729/12188 [1:31:48<22:48:52, 7.17s/it] {'loss': 0.4563, 'grad_norm': 0.698749954429685, 'learning_rate': 9.976882543898043e-06, 'epoch': 0.06} + 6%|▌ | 729/12188 [1:31:48<22:48:52, 7.17s/it] 6%|▌ | 730/12188 [1:31:54<22:11:47, 6.97s/it] {'loss': 0.4155, 'grad_norm': 1.365501991450519, 'learning_rate': 9.976754745928301e-06, 'epoch': 0.06} + 6%|▌ | 730/12188 [1:31:54<22:11:47, 6.97s/it] 6%|▌ | 731/12188 [1:32:02<22:34:47, 7.09s/it] {'loss': 0.4124, 'grad_norm': 0.7138149946173228, 'learning_rate': 9.976626596508133e-06, 'epoch': 0.06} + 6%|▌ | 731/12188 [1:32:02<22:34:47, 7.09s/it] 6%|▌ | 732/12188 [1:32:09<22:21:15, 7.02s/it] {'loss': 0.4202, 'grad_norm': 0.689960220384126, 'learning_rate': 9.976498095646592e-06, 'epoch': 0.06} + 6%|▌ | 732/12188 [1:32:09<22:21:15, 7.02s/it] 6%|▌ | 733/12188 [1:32:16<22:12:23, 6.98s/it] {'loss': 0.4494, 'grad_norm': 0.6883236046015051, 'learning_rate': 9.97636924335275e-06, 'epoch': 0.06} + 6%|▌ | 733/12188 [1:32:16<22:12:23, 6.98s/it] 6%|▌ | 734/12188 [1:32:23<22:38:54, 7.12s/it] {'loss': 0.4205, 'grad_norm': 0.7124473723343269, 'learning_rate': 9.976240039635708e-06, 'epoch': 0.06} + 6%|▌ | 734/12188 [1:32:23<22:38:54, 7.12s/it] 6%|▌ | 735/12188 [1:32:31<23:02:12, 7.24s/it] {'loss': 0.4706, 'grad_norm': 0.7004946345465758, 'learning_rate': 9.976110484504587e-06, 'epoch': 0.06} + 6%|▌ | 735/12188 [1:32:31<23:02:12, 7.24s/it] 6%|▌ | 736/12188 [1:32:41<26:08:25, 8.22s/it] {'loss': 0.4368, 'grad_norm': 0.7411236802469312, 'learning_rate': 9.97598057796854e-06, 'epoch': 0.06} + 6%|▌ | 736/12188 [1:32:41<26:08:25, 8.22s/it] 6%|▌ | 737/12188 [1:32:48<24:59:37, 7.86s/it] {'loss': 0.4196, 'grad_norm': 0.7287422583409956, 'learning_rate': 9.975850320036739e-06, 'epoch': 0.06} + 6%|▌ | 737/12188 [1:32:48<24:59:37, 7.86s/it] 6%|▌ | 738/12188 [1:32:56<24:49:13, 7.80s/it] {'loss': 0.3967, 'grad_norm': 0.763393132690018, 'learning_rate': 9.975719710718384e-06, 'epoch': 0.06} + 6%|▌ | 738/12188 [1:32:56<24:49:13, 7.80s/it] 6%|▌ | 739/12188 [1:33:04<24:55:50, 7.84s/it] {'loss': 0.4315, 'grad_norm': 0.7430463384461315, 'learning_rate': 9.975588750022696e-06, 'epoch': 0.06} + 6%|▌ | 739/12188 [1:33:04<24:55:50, 7.84s/it] 6%|▌ | 740/12188 [1:33:11<24:32:45, 7.72s/it] {'loss': 0.4663, 'grad_norm': 0.6601706037088947, 'learning_rate': 9.975457437958924e-06, 'epoch': 0.06} + 6%|▌ | 740/12188 [1:33:11<24:32:45, 7.72s/it] 6%|▌ | 741/12188 [1:33:18<23:47:16, 7.48s/it] {'loss': 0.4436, 'grad_norm': 0.7148333770866786, 'learning_rate': 9.975325774536342e-06, 'epoch': 0.06} + 6%|▌ | 741/12188 [1:33:18<23:47:16, 7.48s/it] 6%|▌ | 742/12188 [1:33:25<23:15:14, 7.31s/it] {'loss': 0.4568, 'grad_norm': 1.206851605937381, 'learning_rate': 9.975193759764245e-06, 'epoch': 0.06} + 6%|▌ | 742/12188 [1:33:25<23:15:14, 7.31s/it] 6%|▌ | 743/12188 [1:33:32<22:33:39, 7.10s/it] {'loss': 0.411, 'grad_norm': 0.7106445755470513, 'learning_rate': 9.975061393651961e-06, 'epoch': 0.06} + 6%|▌ | 743/12188 [1:33:32<22:33:39, 7.10s/it] 6%|▌ | 744/12188 [1:33:38<22:04:35, 6.94s/it] {'loss': 0.4777, 'grad_norm': 0.7015856967347889, 'learning_rate': 9.974928676208835e-06, 'epoch': 0.06} + 6%|▌ | 744/12188 [1:33:38<22:04:35, 6.94s/it] 6%|▌ | 745/12188 [1:33:48<25:15:16, 7.95s/it] {'loss': 0.3988, 'grad_norm': 0.7359090101404293, 'learning_rate': 9.974795607444236e-06, 'epoch': 0.06} + 6%|▌ | 745/12188 [1:33:48<25:15:16, 7.95s/it] 6%|▌ | 746/12188 [1:33:55<24:15:09, 7.63s/it] {'loss': 0.4337, 'grad_norm': 0.6918746545964252, 'learning_rate': 9.974662187367565e-06, 'epoch': 0.06} + 6%|▌ | 746/12188 [1:33:55<24:15:09, 7.63s/it] 6%|▌ | 747/12188 [1:34:03<24:25:24, 7.69s/it] {'loss': 0.4263, 'grad_norm': 0.6899487522747162, 'learning_rate': 9.974528415988242e-06, 'epoch': 0.06} + 6%|▌ | 747/12188 [1:34:03<24:25:24, 7.69s/it] 6%|▌ | 748/12188 [1:34:10<23:16:25, 7.32s/it] {'loss': 0.4624, 'grad_norm': 0.8229182671995571, 'learning_rate': 9.974394293315715e-06, 'epoch': 0.06} + 6%|▌ | 748/12188 [1:34:10<23:16:25, 7.32s/it] 6%|▌ | 749/12188 [1:34:17<23:08:08, 7.28s/it] {'loss': 0.4167, 'grad_norm': 0.7915810023953279, 'learning_rate': 9.974259819359456e-06, 'epoch': 0.06} + 6%|▌ | 749/12188 [1:34:17<23:08:08, 7.28s/it] 6%|▌ | 750/12188 [1:34:24<22:43:36, 7.15s/it] {'loss': 0.4427, 'grad_norm': 0.7307976403936562, 'learning_rate': 9.974124994128959e-06, 'epoch': 0.06} + 6%|▌ | 750/12188 [1:34:24<22:43:36, 7.15s/it] 6%|▌ | 751/12188 [1:34:30<22:16:08, 7.01s/it] {'loss': 0.4464, 'grad_norm': 0.8840178619546158, 'learning_rate': 9.973989817633746e-06, 'epoch': 0.06} + 6%|▌ | 751/12188 [1:34:30<22:16:08, 7.01s/it] 6%|▌ | 752/12188 [1:34:39<23:34:38, 7.42s/it] {'loss': 0.423, 'grad_norm': 0.7681184741000145, 'learning_rate': 9.973854289883364e-06, 'epoch': 0.06} + 6%|▌ | 752/12188 [1:34:39<23:34:38, 7.42s/it] 6%|▌ | 753/12188 [1:34:46<23:05:22, 7.27s/it] {'loss': 0.4433, 'grad_norm': 0.7844139231878867, 'learning_rate': 9.973718410887383e-06, 'epoch': 0.06} + 6%|▌ | 753/12188 [1:34:46<23:05:22, 7.27s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 6%|▌ | 754/12188 [1:34:52<22:17:05, 7.02s/it] {'loss': 0.8212, 'grad_norm': 1.2242308802650388, 'learning_rate': 9.9735821806554e-06, 'epoch': 0.06} + 6%|▌ | 754/12188 [1:34:52<22:17:05, 7.02s/it] 6%|▌ | 755/12188 [1:34:59<22:06:17, 6.96s/it] {'loss': 0.4426, 'grad_norm': 0.6870410981631561, 'learning_rate': 9.973445599197034e-06, 'epoch': 0.06} + 6%|▌ | 755/12188 [1:34:59<22:06:17, 6.96s/it] 6%|▌ | 756/12188 [1:35:07<22:52:13, 7.20s/it] {'loss': 0.4414, 'grad_norm': 0.7081273307998821, 'learning_rate': 9.973308666521929e-06, 'epoch': 0.06} + 6%|▌ | 756/12188 [1:35:07<22:52:13, 7.20s/it] 6%|▌ | 757/12188 [1:35:15<23:45:17, 7.48s/it] {'loss': 0.4665, 'grad_norm': 1.3391986335129844, 'learning_rate': 9.973171382639757e-06, 'epoch': 0.06} + 6%|▌ | 757/12188 [1:35:15<23:45:17, 7.48s/it] 6%|▌ | 758/12188 [1:35:22<23:02:52, 7.26s/it] {'loss': 0.4076, 'grad_norm': 0.6881160323772392, 'learning_rate': 9.97303374756021e-06, 'epoch': 0.06} + 6%|▌ | 758/12188 [1:35:22<23:02:52, 7.26s/it] 6%|▌ | 759/12188 [1:35:29<22:48:19, 7.18s/it] {'loss': 0.4302, 'grad_norm': 1.0908976755791904, 'learning_rate': 9.972895761293012e-06, 'epoch': 0.06} + 6%|▌ | 759/12188 [1:35:29<22:48:19, 7.18s/it] 6%|▌ | 760/12188 [1:35:35<22:23:12, 7.05s/it] {'loss': 0.4751, 'grad_norm': 1.0851294031523933, 'learning_rate': 9.972757423847904e-06, 'epoch': 0.06} + 6%|▌ | 760/12188 [1:35:35<22:23:12, 7.05s/it] 6%|▌ | 761/12188 [1:35:42<22:30:09, 7.09s/it] {'loss': 0.3922, 'grad_norm': 0.9666763642516082, 'learning_rate': 9.972618735234655e-06, 'epoch': 0.06} + 6%|▌ | 761/12188 [1:35:42<22:30:09, 7.09s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f01c8be6700> +[Try #0] Failed to fetch sample 4829954 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f01c8be6700> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Log in'"}, {'from': 'gpt', 'value': '\nclick(x=0.9655, y=0.12)\n'}]} + 6%|▋ | 762/12188 [1:35:49<22:14:40, 7.01s/it] {'loss': 0.466, 'grad_norm': 0.7221454177423865, 'learning_rate': 9.972479695463061e-06, 'epoch': 0.06} + 6%|▋ | 762/12188 [1:35:49<22:14:40, 7.01s/it] 6%|▋ | 763/12188 [1:35:56<21:44:51, 6.85s/it] {'loss': 0.4435, 'grad_norm': 0.7506869139308555, 'learning_rate': 9.972340304542941e-06, 'epoch': 0.06} + 6%|▋ | 763/12188 [1:35:56<21:44:51, 6.85s/it] 6%|▋ | 764/12188 [1:36:05<24:05:11, 7.59s/it] {'loss': 0.4597, 'grad_norm': 0.704660536991921, 'learning_rate': 9.972200562484135e-06, 'epoch': 0.06} + 6%|▋ | 764/12188 [1:36:05<24:05:11, 7.59s/it] 6%|▋ | 765/12188 [1:36:12<23:17:52, 7.34s/it] {'loss': 0.4261, 'grad_norm': 1.2498355443195492, 'learning_rate': 9.972060469296517e-06, 'epoch': 0.06} + 6%|▋ | 765/12188 [1:36:12<23:17:52, 7.34s/it] 6%|▋ | 766/12188 [1:36:19<22:57:08, 7.23s/it] {'loss': 0.4334, 'grad_norm': 0.9689363988185616, 'learning_rate': 9.971920024989974e-06, 'epoch': 0.06} + 6%|▋ | 766/12188 [1:36:19<22:57:08, 7.23s/it] 6%|▋ | 767/12188 [1:36:26<23:16:11, 7.33s/it] {'loss': 0.4686, 'grad_norm': 0.7085182329360842, 'learning_rate': 9.971779229574427e-06, 'epoch': 0.06} + 6%|▋ | 767/12188 [1:36:26<23:16:11, 7.33s/it] 6%|▋ | 768/12188 [1:36:35<24:23:52, 7.69s/it] {'loss': 0.4762, 'grad_norm': 0.7602390024304373, 'learning_rate': 9.97163808305982e-06, 'epoch': 0.06} + 6%|▋ | 768/12188 [1:36:35<24:23:52, 7.69s/it] 6%|▋ | 769/12188 [1:36:45<26:51:11, 8.47s/it] {'loss': 0.4486, 'grad_norm': 0.7386600804349428, 'learning_rate': 9.971496585456119e-06, 'epoch': 0.06} + 6%|▋ | 769/12188 [1:36:45<26:51:11, 8.47s/it] 6%|▋ | 770/12188 [1:36:52<25:41:15, 8.10s/it] {'loss': 0.4035, 'grad_norm': 0.9861261741806931, 'learning_rate': 9.971354736773315e-06, 'epoch': 0.06} + 6%|▋ | 770/12188 [1:36:52<25:41:15, 8.10s/it] 6%|▋ | 771/12188 [1:37:00<24:59:22, 7.88s/it] {'loss': 0.4162, 'grad_norm': 0.8760235492009005, 'learning_rate': 9.971212537021428e-06, 'epoch': 0.06} + 6%|▋ | 771/12188 [1:37:00<24:59:22, 7.88s/it] 6%|▋ | 772/12188 [1:37:07<23:58:52, 7.56s/it] {'loss': 0.4445, 'grad_norm': 0.7876566257276348, 'learning_rate': 9.971069986210498e-06, 'epoch': 0.06} + 6%|▋ | 772/12188 [1:37:07<23:58:52, 7.56s/it] 6%|▋ | 773/12188 [1:37:14<23:29:21, 7.41s/it] {'loss': 0.4428, 'grad_norm': 0.7655248219598937, 'learning_rate': 9.970927084350592e-06, 'epoch': 0.06} + 6%|▋ | 773/12188 [1:37:14<23:29:21, 7.41s/it] 6%|▋ | 774/12188 [1:37:24<26:06:39, 8.24s/it] {'loss': 0.4379, 'grad_norm': 0.7809007178992573, 'learning_rate': 9.970783831451803e-06, 'epoch': 0.06} + 6%|▋ | 774/12188 [1:37:24<26:06:39, 8.24s/it] 6%|▋ | 775/12188 [1:37:31<24:55:45, 7.86s/it] {'loss': 0.4298, 'grad_norm': 1.5540063561559874, 'learning_rate': 9.970640227524244e-06, 'epoch': 0.06} + 6%|▋ | 775/12188 [1:37:31<24:55:45, 7.86s/it] 6%|▋ | 776/12188 [1:37:38<23:57:28, 7.56s/it] {'loss': 0.4446, 'grad_norm': 0.7521697594060276, 'learning_rate': 9.970496272578058e-06, 'epoch': 0.06} + 6%|▋ | 776/12188 [1:37:38<23:57:28, 7.56s/it] 6%|▋ | 777/12188 [1:37:44<23:02:52, 7.27s/it] {'loss': 0.4388, 'grad_norm': 0.7875072055248622, 'learning_rate': 9.970351966623413e-06, 'epoch': 0.06} + 6%|▋ | 777/12188 [1:37:44<23:02:52, 7.27s/it] 6%|▋ | 778/12188 [1:37:51<22:43:33, 7.17s/it] {'loss': 0.4379, 'grad_norm': 0.8167199238283892, 'learning_rate': 9.970207309670496e-06, 'epoch': 0.06} + 6%|▋ | 778/12188 [1:37:51<22:43:33, 7.17s/it] 6%|▋ | 779/12188 [1:37:58<22:34:45, 7.12s/it] {'loss': 0.4094, 'grad_norm': 0.6097628144553788, 'learning_rate': 9.970062301729525e-06, 'epoch': 0.06} + 6%|▋ | 779/12188 [1:37:58<22:34:45, 7.12s/it] 6%|▋ | 780/12188 [1:38:06<22:48:58, 7.20s/it] {'loss': 0.4334, 'grad_norm': 0.8465271046831818, 'learning_rate': 9.969916942810738e-06, 'epoch': 0.06} + 6%|▋ | 780/12188 [1:38:06<22:48:58, 7.20s/it] 6%|▋ | 781/12188 [1:38:13<23:04:08, 7.28s/it] {'loss': 0.4444, 'grad_norm': 1.122569986165872, 'learning_rate': 9.969771232924404e-06, 'epoch': 0.06} + 6%|▋ | 781/12188 [1:38:13<23:04:08, 7.28s/it] 6%|▋ | 782/12188 [1:38:21<23:38:37, 7.46s/it] {'loss': 0.4448, 'grad_norm': 0.712845475178253, 'learning_rate': 9.969625172080807e-06, 'epoch': 0.06} + 6%|▋ | 782/12188 [1:38:21<23:38:37, 7.46s/it] 6%|▋ | 783/12188 [1:38:28<23:02:54, 7.28s/it] {'loss': 0.4189, 'grad_norm': 0.8946826208160347, 'learning_rate': 9.969478760290265e-06, 'epoch': 0.06} + 6%|▋ | 783/12188 [1:38:28<23:02:54, 7.28s/it] 6%|▋ | 784/12188 [1:38:35<23:05:14, 7.29s/it] {'loss': 0.4371, 'grad_norm': 0.839586905995174, 'learning_rate': 9.969331997563119e-06, 'epoch': 0.06} + 6%|▋ | 784/12188 [1:38:35<23:05:14, 7.29s/it] 6%|▋ | 785/12188 [1:38:43<23:13:17, 7.33s/it] {'loss': 0.4427, 'grad_norm': 0.7322923888964609, 'learning_rate': 9.96918488390973e-06, 'epoch': 0.06} + 6%|▋ | 785/12188 [1:38:43<23:13:17, 7.33s/it] 6%|▋ | 786/12188 [1:38:49<22:42:11, 7.17s/it] {'loss': 0.435, 'grad_norm': 0.9671674751601352, 'learning_rate': 9.969037419340488e-06, 'epoch': 0.06} + 6%|▋ | 786/12188 [1:38:49<22:42:11, 7.17s/it] 6%|▋ | 787/12188 [1:38:56<22:30:20, 7.11s/it] {'loss': 0.4875, 'grad_norm': 0.8274759991754652, 'learning_rate': 9.968889603865807e-06, 'epoch': 0.06} + 6%|▋ | 787/12188 [1:38:56<22:30:20, 7.11s/it] 6%|▋ | 788/12188 [1:39:08<26:27:11, 8.35s/it] {'loss': 0.4168, 'grad_norm': 0.700627593526248, 'learning_rate': 9.968741437496126e-06, 'epoch': 0.06} + 6%|▋ | 788/12188 [1:39:08<26:27:11, 8.35s/it] 6%|▋ | 789/12188 [1:39:16<26:23:43, 8.34s/it] {'loss': 0.3956, 'grad_norm': 1.0800517278059598, 'learning_rate': 9.968592920241906e-06, 'epoch': 0.06} + 6%|▋ | 789/12188 [1:39:16<26:23:43, 8.34s/it] 6%|▋ | 790/12188 [1:39:23<24:47:11, 7.83s/it] {'loss': 0.4102, 'grad_norm': 0.7268277124759637, 'learning_rate': 9.968444052113637e-06, 'epoch': 0.06} + 6%|▋ | 790/12188 [1:39:23<24:47:11, 7.83s/it] 6%|▋ | 791/12188 [1:39:30<24:22:15, 7.70s/it] {'loss': 0.4927, 'grad_norm': 0.8215679318681983, 'learning_rate': 9.968294833121831e-06, 'epoch': 0.06} + 6%|▋ | 791/12188 [1:39:30<24:22:15, 7.70s/it] 6%|▋ | 792/12188 [1:39:37<23:43:21, 7.49s/it] {'loss': 0.4404, 'grad_norm': 0.7982039392106447, 'learning_rate': 9.968145263277029e-06, 'epoch': 0.06} + 6%|▋ | 792/12188 [1:39:37<23:43:21, 7.49s/it] 7%|▋ | 793/12188 [1:39:44<23:04:35, 7.29s/it] {'loss': 0.418, 'grad_norm': 0.7445289226029976, 'learning_rate': 9.967995342589787e-06, 'epoch': 0.07} + 7%|▋ | 793/12188 [1:39:44<23:04:35, 7.29s/it] 7%|▋ | 794/12188 [1:39:51<22:36:03, 7.14s/it] {'loss': 0.4253, 'grad_norm': 1.2928868651018985, 'learning_rate': 9.967845071070697e-06, 'epoch': 0.07} + 7%|▋ | 794/12188 [1:39:51<22:36:03, 7.14s/it] 7%|▋ | 795/12188 [1:39:57<22:13:38, 7.02s/it] {'loss': 0.4508, 'grad_norm': 0.8193165161970162, 'learning_rate': 9.967694448730369e-06, 'epoch': 0.07} + 7%|▋ | 795/12188 [1:39:57<22:13:38, 7.02s/it] 7%|▋ | 796/12188 [1:40:04<21:44:40, 6.87s/it] {'loss': 0.4454, 'grad_norm': 1.0708209673453732, 'learning_rate': 9.967543475579442e-06, 'epoch': 0.07} + 7%|▋ | 796/12188 [1:40:04<21:44:40, 6.87s/it] 7%|▋ | 797/12188 [1:40:11<21:48:12, 6.89s/it] {'loss': 0.4569, 'grad_norm': 0.7758366615047031, 'learning_rate': 9.967392151628574e-06, 'epoch': 0.07} + 7%|▋ | 797/12188 [1:40:11<21:48:12, 6.89s/it] 7%|▋ | 798/12188 [1:40:18<22:07:59, 7.00s/it] {'loss': 0.4478, 'grad_norm': 1.5408654914684596, 'learning_rate': 9.967240476888453e-06, 'epoch': 0.07} + 7%|▋ | 798/12188 [1:40:18<22:07:59, 7.00s/it] 7%|▋ | 799/12188 [1:40:26<22:44:23, 7.19s/it] {'loss': 0.4511, 'grad_norm': 0.8709431320997821, 'learning_rate': 9.96708845136979e-06, 'epoch': 0.07} + 7%|▋ | 799/12188 [1:40:26<22:44:23, 7.19s/it] 7%|▋ | 800/12188 [1:40:33<23:02:08, 7.28s/it] {'loss': 0.4253, 'grad_norm': 0.7224564717546419, 'learning_rate': 9.966936075083323e-06, 'epoch': 0.07} + 7%|▋ | 800/12188 [1:40:33<23:02:08, 7.28s/it] 7%|▋ | 801/12188 [1:40:40<22:28:47, 7.11s/it] {'loss': 0.4567, 'grad_norm': 0.9614689114125544, 'learning_rate': 9.966783348039808e-06, 'epoch': 0.07} + 7%|▋ | 801/12188 [1:40:40<22:28:47, 7.11s/it] 7%|▋ | 802/12188 [1:40:47<22:34:34, 7.14s/it] {'loss': 0.4244, 'grad_norm': 0.7498416461943958, 'learning_rate': 9.966630270250033e-06, 'epoch': 0.07} + 7%|▋ | 802/12188 [1:40:47<22:34:34, 7.14s/it] 7%|▋ | 803/12188 [1:40:54<22:26:38, 7.10s/it] {'loss': 0.4164, 'grad_norm': 2.8828206407894488, 'learning_rate': 9.966476841724808e-06, 'epoch': 0.07} + 7%|▋ | 803/12188 [1:40:54<22:26:38, 7.10s/it] 7%|▋ | 804/12188 [1:41:01<22:45:37, 7.20s/it] {'loss': 0.4477, 'grad_norm': 1.5114530211697998, 'learning_rate': 9.96632306247497e-06, 'epoch': 0.07} + 7%|▋ | 804/12188 [1:41:01<22:45:37, 7.20s/it] 7%|▋ | 805/12188 [1:41:10<24:11:46, 7.65s/it] {'loss': 0.4696, 'grad_norm': 1.4176317734140607, 'learning_rate': 9.966168932511372e-06, 'epoch': 0.07} + 7%|▋ | 805/12188 [1:41:10<24:11:46, 7.65s/it] 7%|▋ | 806/12188 [1:41:17<23:09:23, 7.32s/it] {'loss': 0.4472, 'grad_norm': 1.3902368893682386, 'learning_rate': 9.966014451844905e-06, 'epoch': 0.07} + 7%|▋ | 806/12188 [1:41:17<23:09:23, 7.32s/it] 7%|▋ | 807/12188 [1:41:23<22:30:17, 7.12s/it] {'loss': 0.4354, 'grad_norm': 1.2815829654197612, 'learning_rate': 9.965859620486476e-06, 'epoch': 0.07} + 7%|▋ | 807/12188 [1:41:23<22:30:17, 7.12s/it] 7%|▋ | 808/12188 [1:41:30<22:17:27, 7.05s/it] {'loss': 0.4239, 'grad_norm': 1.0073065909922674, 'learning_rate': 9.96570443844702e-06, 'epoch': 0.07} + 7%|▋ | 808/12188 [1:41:30<22:17:27, 7.05s/it] 7%|▋ | 809/12188 [1:41:38<22:57:09, 7.26s/it] {'loss': 0.4482, 'grad_norm': 2.2286609973890084, 'learning_rate': 9.965548905737492e-06, 'epoch': 0.07} + 7%|▋ | 809/12188 [1:41:38<22:57:09, 7.26s/it] 7%|▋ | 810/12188 [1:41:45<22:21:17, 7.07s/it] {'loss': 0.4165, 'grad_norm': 1.0475889602091626, 'learning_rate': 9.965393022368878e-06, 'epoch': 0.07} + 7%|▋ | 810/12188 [1:41:45<22:21:17, 7.07s/it] 7%|▋ | 811/12188 [1:41:51<22:03:56, 6.98s/it] {'loss': 0.4315, 'grad_norm': 3.526510655375477, 'learning_rate': 9.96523678835219e-06, 'epoch': 0.07} + 7%|▋ | 811/12188 [1:41:51<22:03:56, 6.98s/it] 7%|▋ | 812/12188 [1:42:01<24:50:23, 7.86s/it] {'loss': 0.4419, 'grad_norm': 1.1650784318387941, 'learning_rate': 9.965080203698453e-06, 'epoch': 0.07} + 7%|▋ | 812/12188 [1:42:01<24:50:23, 7.86s/it] 7%|▋ | 813/12188 [1:42:08<24:02:40, 7.61s/it] {'loss': 0.4243, 'grad_norm': 1.2897660053375555, 'learning_rate': 9.964923268418732e-06, 'epoch': 0.07} + 7%|▋ | 813/12188 [1:42:08<24:02:40, 7.61s/it] 7%|▋ | 814/12188 [1:42:16<23:44:41, 7.52s/it] {'loss': 0.3793, 'grad_norm': 1.3471857865843913, 'learning_rate': 9.964765982524105e-06, 'epoch': 0.07} + 7%|▋ | 814/12188 [1:42:16<23:44:41, 7.52s/it] 7%|▋ | 815/12188 [1:42:24<24:16:59, 7.69s/it] {'loss': 0.4237, 'grad_norm': 0.7971019248886824, 'learning_rate': 9.964608346025681e-06, 'epoch': 0.07} + 7%|▋ | 815/12188 [1:42:24<24:16:59, 7.69s/it] 7%|▋ | 816/12188 [1:42:31<24:09:54, 7.65s/it] {'loss': 0.4356, 'grad_norm': 0.8009107995151056, 'learning_rate': 9.964450358934593e-06, 'epoch': 0.07} + 7%|▋ | 816/12188 [1:42:31<24:09:54, 7.65s/it] 7%|▋ | 817/12188 [1:42:39<23:47:20, 7.53s/it] {'loss': 0.4389, 'grad_norm': 2.554239154437295, 'learning_rate': 9.964292021261996e-06, 'epoch': 0.07} + 7%|▋ | 817/12188 [1:42:39<23:47:20, 7.53s/it] 7%|▋ | 818/12188 [1:42:48<25:29:06, 8.07s/it] {'loss': 0.4122, 'grad_norm': 0.6872421786623378, 'learning_rate': 9.96413333301907e-06, 'epoch': 0.07} + 7%|▋ | 818/12188 [1:42:48<25:29:06, 8.07s/it] 7%|▋ | 819/12188 [1:42:58<27:29:02, 8.70s/it] {'loss': 0.3744, 'grad_norm': 0.8203768788619519, 'learning_rate': 9.963974294217027e-06, 'epoch': 0.07} + 7%|▋ | 819/12188 [1:42:58<27:29:02, 8.70s/it] 7%|▋ | 820/12188 [1:43:05<25:52:28, 8.19s/it] {'loss': 0.4436, 'grad_norm': 0.7307300269137603, 'learning_rate': 9.963814904867092e-06, 'epoch': 0.07} + 7%|▋ | 820/12188 [1:43:05<25:52:28, 8.19s/it] 7%|▋ | 821/12188 [1:43:13<25:32:29, 8.09s/it] {'loss': 0.4175, 'grad_norm': 0.7441668812092661, 'learning_rate': 9.963655164980527e-06, 'epoch': 0.07} + 7%|▋ | 821/12188 [1:43:13<25:32:29, 8.09s/it] 7%|▋ | 822/12188 [1:43:20<24:54:19, 7.89s/it] {'loss': 0.4084, 'grad_norm': 0.7816769697993257, 'learning_rate': 9.963495074568605e-06, 'epoch': 0.07} + 7%|▋ | 822/12188 [1:43:20<24:54:19, 7.89s/it] 7%|▋ | 823/12188 [1:43:27<23:47:10, 7.53s/it] {'loss': 0.3974, 'grad_norm': 1.2765553223043258, 'learning_rate': 9.963334633642637e-06, 'epoch': 0.07} + 7%|▋ | 823/12188 [1:43:27<23:47:10, 7.53s/it] 7%|▋ | 824/12188 [1:43:34<23:23:58, 7.41s/it] {'loss': 0.4698, 'grad_norm': 0.8527544314673186, 'learning_rate': 9.96317384221395e-06, 'epoch': 0.07} + 7%|▋ | 824/12188 [1:43:34<23:23:58, 7.41s/it] 7%|▋ | 825/12188 [1:43:42<23:58:09, 7.59s/it] {'loss': 0.4739, 'grad_norm': 1.100337221522185, 'learning_rate': 9.963012700293902e-06, 'epoch': 0.07} + 7%|▋ | 825/12188 [1:43:42<23:58:09, 7.59s/it] 7%|▋ | 826/12188 [1:43:49<23:16:52, 7.38s/it] {'loss': 0.4414, 'grad_norm': 1.0121221309547375, 'learning_rate': 9.962851207893871e-06, 'epoch': 0.07} + 7%|▋ | 826/12188 [1:43:49<23:16:52, 7.38s/it] 7%|▋ | 827/12188 [1:43:58<24:18:50, 7.70s/it] {'loss': 0.5147, 'grad_norm': 0.8276096036747058, 'learning_rate': 9.962689365025259e-06, 'epoch': 0.07} + 7%|▋ | 827/12188 [1:43:58<24:18:50, 7.70s/it] 7%|▋ | 828/12188 [1:44:05<24:31:56, 7.77s/it] {'loss': 0.4067, 'grad_norm': 1.1336871801781099, 'learning_rate': 9.962527171699498e-06, 'epoch': 0.07} + 7%|▋ | 828/12188 [1:44:05<24:31:56, 7.77s/it] 7%|▋ | 829/12188 [1:44:12<23:40:23, 7.50s/it] {'loss': 0.4279, 'grad_norm': 0.7388575057541553, 'learning_rate': 9.962364627928043e-06, 'epoch': 0.07} + 7%|▋ | 829/12188 [1:44:12<23:40:23, 7.50s/it] 7%|▋ | 830/12188 [1:44:19<23:13:52, 7.36s/it] {'loss': 0.4247, 'grad_norm': 0.8305770274498132, 'learning_rate': 9.962201733722369e-06, 'epoch': 0.07} + 7%|▋ | 830/12188 [1:44:19<23:13:52, 7.36s/it] 7%|▋ | 831/12188 [1:44:27<23:13:42, 7.36s/it] {'loss': 0.3671, 'grad_norm': 0.7195198782142942, 'learning_rate': 9.96203848909398e-06, 'epoch': 0.07} + 7%|▋ | 831/12188 [1:44:27<23:13:42, 7.36s/it] 7%|▋ | 832/12188 [1:44:35<23:40:21, 7.50s/it] {'loss': 0.4114, 'grad_norm': 0.7518704800600448, 'learning_rate': 9.961874894054407e-06, 'epoch': 0.07} + 7%|▋ | 832/12188 [1:44:35<23:40:21, 7.50s/it] 7%|▋ | 833/12188 [1:44:42<23:44:43, 7.53s/it] {'loss': 0.4067, 'grad_norm': 1.1253583843187456, 'learning_rate': 9.9617109486152e-06, 'epoch': 0.07} + 7%|▋ | 833/12188 [1:44:42<23:44:43, 7.53s/it] 7%|▋ | 834/12188 [1:44:49<22:54:08, 7.26s/it] {'loss': 0.4579, 'grad_norm': 1.0157503255728841, 'learning_rate': 9.961546652787937e-06, 'epoch': 0.07} + 7%|▋ | 834/12188 [1:44:49<22:54:08, 7.26s/it] 7%|▋ | 835/12188 [1:44:56<22:42:29, 7.20s/it] {'loss': 0.4966, 'grad_norm': 0.7638527294071445, 'learning_rate': 9.96138200658422e-06, 'epoch': 0.07} + 7%|▋ | 835/12188 [1:44:56<22:42:29, 7.20s/it] 7%|▋ | 836/12188 [1:45:03<22:37:35, 7.18s/it] {'loss': 0.3933, 'grad_norm': 0.7524118615609422, 'learning_rate': 9.961217010015679e-06, 'epoch': 0.07} + 7%|▋ | 836/12188 [1:45:03<22:37:35, 7.18s/it] 7%|▋ | 837/12188 [1:45:10<22:37:34, 7.18s/it] {'loss': 0.4711, 'grad_norm': 0.7171987527369611, 'learning_rate': 9.961051663093962e-06, 'epoch': 0.07} + 7%|▋ | 837/12188 [1:45:10<22:37:34, 7.18s/it] 7%|▋ | 838/12188 [1:45:19<23:52:40, 7.57s/it] {'loss': 0.4051, 'grad_norm': 0.6954227123865457, 'learning_rate': 9.960885965830748e-06, 'epoch': 0.07} + 7%|▋ | 838/12188 [1:45:19<23:52:40, 7.57s/it] 7%|▋ | 839/12188 [1:45:25<23:04:54, 7.32s/it] {'loss': 0.3936, 'grad_norm': 0.7677645402953993, 'learning_rate': 9.960719918237735e-06, 'epoch': 0.07} + 7%|▋ | 839/12188 [1:45:25<23:04:54, 7.32s/it] 7%|▋ | 840/12188 [1:45:32<22:47:27, 7.23s/it] {'loss': 0.4283, 'grad_norm': 1.249771091919413, 'learning_rate': 9.960553520326655e-06, 'epoch': 0.07} + 7%|▋ | 840/12188 [1:45:32<22:47:27, 7.23s/it] 7%|▋ | 841/12188 [1:45:39<22:35:05, 7.17s/it] {'loss': 0.4499, 'grad_norm': 1.086126015497951, 'learning_rate': 9.960386772109253e-06, 'epoch': 0.07} + 7%|▋ | 841/12188 [1:45:39<22:35:05, 7.17s/it] 7%|▋ | 842/12188 [1:45:48<23:36:42, 7.49s/it] {'loss': 0.4545, 'grad_norm': 1.3049367413643127, 'learning_rate': 9.960219673597307e-06, 'epoch': 0.07} + 7%|▋ | 842/12188 [1:45:48<23:36:42, 7.49s/it] 7%|▋ | 843/12188 [1:45:54<22:52:13, 7.26s/it] {'loss': 0.4245, 'grad_norm': 1.14774218072718, 'learning_rate': 9.960052224802617e-06, 'epoch': 0.07} + 7%|▋ | 843/12188 [1:45:54<22:52:13, 7.26s/it] 7%|▋ | 844/12188 [1:46:01<22:39:24, 7.19s/it] {'loss': 0.4504, 'grad_norm': 0.9836341158386622, 'learning_rate': 9.959884425737008e-06, 'epoch': 0.07} + 7%|▋ | 844/12188 [1:46:01<22:39:24, 7.19s/it] 7%|▋ | 845/12188 [1:46:08<22:33:41, 7.16s/it] {'loss': 0.4561, 'grad_norm': 0.8924807008365208, 'learning_rate': 9.959716276412327e-06, 'epoch': 0.07} + 7%|▋ | 845/12188 [1:46:08<22:33:41, 7.16s/it] 7%|▋ | 846/12188 [1:46:18<25:12:25, 8.00s/it] {'loss': 0.4868, 'grad_norm': 0.6936910880640923, 'learning_rate': 9.959547776840456e-06, 'epoch': 0.07} + 7%|▋ | 846/12188 [1:46:18<25:12:25, 8.00s/it] 7%|▋ | 847/12188 [1:46:25<24:12:29, 7.68s/it] {'loss': 0.4219, 'grad_norm': 0.8373245285095935, 'learning_rate': 9.959378927033286e-06, 'epoch': 0.07} + 7%|▋ | 847/12188 [1:46:25<24:12:29, 7.68s/it] 7%|▋ | 848/12188 [1:46:32<23:21:24, 7.41s/it] {'loss': 0.4314, 'grad_norm': 1.2304606499509505, 'learning_rate': 9.959209727002743e-06, 'epoch': 0.07} + 7%|▋ | 848/12188 [1:46:32<23:21:24, 7.41s/it] 7%|▋ | 849/12188 [1:46:39<23:07:26, 7.34s/it] {'loss': 0.4004, 'grad_norm': 0.6850824073037841, 'learning_rate': 9.959040176760779e-06, 'epoch': 0.07} + 7%|▋ | 849/12188 [1:46:39<23:07:26, 7.34s/it] 7%|▋ | 850/12188 [1:46:47<23:03:23, 7.32s/it] {'loss': 0.4571, 'grad_norm': 0.8407589705078783, 'learning_rate': 9.958870276319364e-06, 'epoch': 0.07} + 7%|▋ | 850/12188 [1:46:47<23:03:23, 7.32s/it] 7%|▋ | 851/12188 [1:46:54<23:07:51, 7.35s/it] {'loss': 0.4501, 'grad_norm': 0.7932644766471253, 'learning_rate': 9.958700025690496e-06, 'epoch': 0.07} + 7%|▋ | 851/12188 [1:46:54<23:07:51, 7.35s/it] 7%|▋ | 852/12188 [1:47:01<22:21:41, 7.10s/it] {'loss': 0.4562, 'grad_norm': 0.7388254719841849, 'learning_rate': 9.9585294248862e-06, 'epoch': 0.07} + 7%|▋ | 852/12188 [1:47:01<22:21:41, 7.10s/it] 7%|▋ | 853/12188 [1:47:08<22:54:11, 7.27s/it] {'loss': 0.4252, 'grad_norm': 1.1298381484092663, 'learning_rate': 9.958358473918523e-06, 'epoch': 0.07} + 7%|▋ | 853/12188 [1:47:08<22:54:11, 7.27s/it] 7%|▋ | 854/12188 [1:47:15<22:45:51, 7.23s/it] {'loss': 0.4128, 'grad_norm': 0.7007826742491075, 'learning_rate': 9.958187172799536e-06, 'epoch': 0.07} + 7%|▋ | 854/12188 [1:47:15<22:45:51, 7.23s/it] 7%|▋ | 855/12188 [1:47:23<22:57:53, 7.29s/it] {'loss': 0.4343, 'grad_norm': 1.1858887404203176, 'learning_rate': 9.958015521541336e-06, 'epoch': 0.07} + 7%|▋ | 855/12188 [1:47:23<22:57:53, 7.29s/it] 7%|▋ | 856/12188 [1:47:30<22:48:52, 7.25s/it] {'loss': 0.4373, 'grad_norm': 0.980151521865545, 'learning_rate': 9.957843520156047e-06, 'epoch': 0.07} + 7%|▋ | 856/12188 [1:47:30<22:48:52, 7.25s/it] 7%|▋ | 857/12188 [1:47:38<23:09:04, 7.36s/it] {'loss': 0.4744, 'grad_norm': 0.7310750944470014, 'learning_rate': 9.957671168655814e-06, 'epoch': 0.07} + 7%|▋ | 857/12188 [1:47:38<23:09:04, 7.36s/it] 7%|▋ | 858/12188 [1:47:45<23:07:22, 7.35s/it] {'loss': 0.4078, 'grad_norm': 0.7178513395996499, 'learning_rate': 9.957498467052807e-06, 'epoch': 0.07} + 7%|▋ | 858/12188 [1:47:45<23:07:22, 7.35s/it] 7%|▋ | 859/12188 [1:47:51<22:22:50, 7.11s/it] {'loss': 0.4015, 'grad_norm': 0.7815808397247829, 'learning_rate': 9.957325415359225e-06, 'epoch': 0.07} + 7%|▋ | 859/12188 [1:47:51<22:22:50, 7.11s/it] 7%|▋ | 860/12188 [1:47:59<22:23:33, 7.12s/it] {'loss': 0.4721, 'grad_norm': 0.8509615027554758, 'learning_rate': 9.957152013587287e-06, 'epoch': 0.07} + 7%|▋ | 860/12188 [1:47:59<22:23:33, 7.12s/it] 7%|▋ | 861/12188 [1:48:06<22:42:20, 7.22s/it] {'loss': 0.423, 'grad_norm': 0.7713550403946904, 'learning_rate': 9.956978261749235e-06, 'epoch': 0.07} + 7%|▋ | 861/12188 [1:48:06<22:42:20, 7.22s/it] 7%|▋ | 862/12188 [1:48:13<22:46:32, 7.24s/it] {'loss': 0.4462, 'grad_norm': 0.7062199270119384, 'learning_rate': 9.956804159857344e-06, 'epoch': 0.07} + 7%|▋ | 862/12188 [1:48:13<22:46:32, 7.24s/it] 7%|▋ | 863/12188 [1:48:20<22:07:54, 7.04s/it] {'loss': 0.3844, 'grad_norm': 0.947921175058583, 'learning_rate': 9.956629707923906e-06, 'epoch': 0.07} + 7%|▋ | 863/12188 [1:48:20<22:07:54, 7.04s/it] 7%|▋ | 864/12188 [1:48:27<22:03:01, 7.01s/it] {'loss': 0.5079, 'grad_norm': 0.7237267006884891, 'learning_rate': 9.95645490596124e-06, 'epoch': 0.07} + 7%|▋ | 864/12188 [1:48:27<22:03:01, 7.01s/it] 7%|▋ | 865/12188 [1:48:35<22:50:40, 7.26s/it] {'loss': 0.4189, 'grad_norm': 0.655085148042254, 'learning_rate': 9.956279753981696e-06, 'epoch': 0.07} + 7%|▋ | 865/12188 [1:48:35<22:50:40, 7.26s/it] 7%|▋ | 866/12188 [1:48:42<23:18:23, 7.41s/it] {'loss': 0.4175, 'grad_norm': 0.691980230640483, 'learning_rate': 9.956104251997635e-06, 'epoch': 0.07} + 7%|▋ | 866/12188 [1:48:42<23:18:23, 7.41s/it] 7%|▋ | 867/12188 [1:48:52<24:53:26, 7.92s/it] {'loss': 0.4222, 'grad_norm': 0.6986008512965722, 'learning_rate': 9.955928400021457e-06, 'epoch': 0.07} + 7%|▋ | 867/12188 [1:48:52<24:53:26, 7.92s/it] 7%|▋ | 868/12188 [1:48:59<24:43:53, 7.87s/it] {'loss': 0.4445, 'grad_norm': 0.783787321772155, 'learning_rate': 9.955752198065577e-06, 'epoch': 0.07} + 7%|▋ | 868/12188 [1:48:59<24:43:53, 7.87s/it] 7%|▋ | 869/12188 [1:49:06<23:51:01, 7.59s/it] {'loss': 0.4414, 'grad_norm': 0.9199050845754191, 'learning_rate': 9.95557564614244e-06, 'epoch': 0.07} + 7%|▋ | 869/12188 [1:49:06<23:51:01, 7.59s/it] 7%|▋ | 870/12188 [1:49:14<23:35:14, 7.50s/it] {'loss': 0.4309, 'grad_norm': 1.0221417999723401, 'learning_rate': 9.955398744264509e-06, 'epoch': 0.07} + 7%|▋ | 870/12188 [1:49:14<23:35:14, 7.50s/it] 7%|▋ | 871/12188 [1:49:20<22:56:45, 7.30s/it] {'loss': 0.454, 'grad_norm': 0.7355849414671902, 'learning_rate': 9.955221492444283e-06, 'epoch': 0.07} + 7%|▋ | 871/12188 [1:49:20<22:56:45, 7.30s/it] 7%|▋ | 872/12188 [1:49:27<22:22:27, 7.12s/it] {'loss': 0.3999, 'grad_norm': 0.7046928827529818, 'learning_rate': 9.955043890694275e-06, 'epoch': 0.07} + 7%|▋ | 872/12188 [1:49:27<22:22:27, 7.12s/it] 7%|▋ | 873/12188 [1:49:37<24:47:16, 7.89s/it] {'loss': 0.413, 'grad_norm': 0.768934199908008, 'learning_rate': 9.954865939027028e-06, 'epoch': 0.07} + 7%|▋ | 873/12188 [1:49:37<24:47:16, 7.89s/it] 7%|▋ | 874/12188 [1:49:44<24:20:04, 7.74s/it] {'loss': 0.4065, 'grad_norm': 0.6883138267404174, 'learning_rate': 9.95468763745511e-06, 'epoch': 0.07} + 7%|▋ | 874/12188 [1:49:44<24:20:04, 7.74s/it] 7%|▋ | 875/12188 [1:49:51<23:40:36, 7.53s/it] {'loss': 0.4163, 'grad_norm': 0.7019350962955648, 'learning_rate': 9.95450898599111e-06, 'epoch': 0.07} + 7%|▋ | 875/12188 [1:49:51<23:40:36, 7.53s/it] 7%|▋ | 876/12188 [1:49:58<22:49:42, 7.27s/it] {'loss': 0.3995, 'grad_norm': 0.7219099885019596, 'learning_rate': 9.954329984647647e-06, 'epoch': 0.07} + 7%|▋ | 876/12188 [1:49:58<22:49:42, 7.27s/it] 7%|▋ | 877/12188 [1:50:04<22:11:34, 7.06s/it] {'loss': 0.4461, 'grad_norm': 0.7740303830137161, 'learning_rate': 9.95415063343736e-06, 'epoch': 0.07} + 7%|▋ | 877/12188 [1:50:04<22:11:34, 7.06s/it] 7%|▋ | 878/12188 [1:50:12<22:51:28, 7.28s/it] {'loss': 0.4504, 'grad_norm': 0.7506149195203048, 'learning_rate': 9.953970932372915e-06, 'epoch': 0.07} + 7%|▋ | 878/12188 [1:50:12<22:51:28, 7.28s/it] 7%|▋ | 879/12188 [1:50:21<24:33:39, 7.82s/it] {'loss': 0.4351, 'grad_norm': 0.6841146588360545, 'learning_rate': 9.953790881467002e-06, 'epoch': 0.07} + 7%|▋ | 879/12188 [1:50:21<24:33:39, 7.82s/it] 7%|▋ | 880/12188 [1:50:28<23:38:41, 7.53s/it] {'loss': 0.395, 'grad_norm': 0.6955057364106116, 'learning_rate': 9.953610480732334e-06, 'epoch': 0.07} + 7%|▋ | 880/12188 [1:50:28<23:38:41, 7.53s/it] 7%|▋ | 881/12188 [1:50:35<23:20:44, 7.43s/it] {'loss': 0.4523, 'grad_norm': 0.8472989469116042, 'learning_rate': 9.953429730181653e-06, 'epoch': 0.07} + 7%|▋ | 881/12188 [1:50:35<23:20:44, 7.43s/it] 7%|▋ | 882/12188 [1:50:43<23:12:54, 7.39s/it] {'loss': 0.4304, 'grad_norm': 0.7076974953858242, 'learning_rate': 9.953248629827726e-06, 'epoch': 0.07} + 7%|▋ | 882/12188 [1:50:43<23:12:54, 7.39s/it] 7%|▋ | 883/12188 [1:50:50<23:09:29, 7.37s/it] {'loss': 0.4311, 'grad_norm': 0.7262891227030667, 'learning_rate': 9.953067179683336e-06, 'epoch': 0.07} + 7%|▋ | 883/12188 [1:50:50<23:09:29, 7.37s/it] 7%|▋ | 884/12188 [1:50:57<22:23:56, 7.13s/it] {'loss': 0.476, 'grad_norm': 1.2932081893528897, 'learning_rate': 9.9528853797613e-06, 'epoch': 0.07} + 7%|▋ | 884/12188 [1:50:57<22:23:56, 7.13s/it] 7%|▋ | 885/12188 [1:51:04<22:25:08, 7.14s/it] {'loss': 0.4224, 'grad_norm': 0.6963747973106224, 'learning_rate': 9.952703230074454e-06, 'epoch': 0.07} + 7%|▋ | 885/12188 [1:51:04<22:25:08, 7.14s/it] 7%|▋ | 886/12188 [1:51:12<23:41:32, 7.55s/it] {'loss': 0.474, 'grad_norm': 0.8955994430951031, 'learning_rate': 9.952520730635665e-06, 'epoch': 0.07} + 7%|▋ | 886/12188 [1:51:12<23:41:32, 7.55s/it] 7%|▋ | 887/12188 [1:51:19<23:03:27, 7.35s/it] {'loss': 0.4233, 'grad_norm': 0.8853102345254197, 'learning_rate': 9.952337881457819e-06, 'epoch': 0.07} + 7%|▋ | 887/12188 [1:51:19<23:03:27, 7.35s/it] 7%|▋ | 888/12188 [1:51:26<22:43:02, 7.24s/it] {'loss': 0.4335, 'grad_norm': 0.8841222377669077, 'learning_rate': 9.952154682553828e-06, 'epoch': 0.07} + 7%|▋ | 888/12188 [1:51:26<22:43:02, 7.24s/it] 7%|▋ | 889/12188 [1:51:33<22:26:01, 7.15s/it] {'loss': 0.4481, 'grad_norm': 0.7363315961335914, 'learning_rate': 9.951971133936629e-06, 'epoch': 0.07} + 7%|▋ | 889/12188 [1:51:33<22:26:01, 7.15s/it] 7%|▋ | 890/12188 [1:51:40<22:08:19, 7.05s/it] {'loss': 0.4283, 'grad_norm': 0.6886394101879773, 'learning_rate': 9.951787235619183e-06, 'epoch': 0.07} + 7%|▋ | 890/12188 [1:51:40<22:08:19, 7.05s/it] 7%|▋ | 891/12188 [1:51:46<21:38:08, 6.89s/it] {'loss': 0.4201, 'grad_norm': 0.7342238704539885, 'learning_rate': 9.951602987614482e-06, 'epoch': 0.07} + 7%|▋ | 891/12188 [1:51:46<21:38:08, 6.89s/it] 7%|▋ | 892/12188 [1:51:53<21:43:34, 6.92s/it] {'loss': 0.4421, 'grad_norm': 0.7172350343042992, 'learning_rate': 9.951418389935529e-06, 'epoch': 0.07} + 7%|▋ | 892/12188 [1:51:53<21:43:34, 6.92s/it] 7%|▋ | 893/12188 [1:52:00<21:44:14, 6.93s/it] {'loss': 0.3911, 'grad_norm': 0.6431648471462383, 'learning_rate': 9.951233442595366e-06, 'epoch': 0.07} + 7%|▋ | 893/12188 [1:52:00<21:44:14, 6.93s/it] 7%|▋ | 894/12188 [1:52:08<22:37:29, 7.21s/it] {'loss': 0.4236, 'grad_norm': 0.6478437120304857, 'learning_rate': 9.951048145607054e-06, 'epoch': 0.07} + 7%|▋ | 894/12188 [1:52:08<22:37:29, 7.21s/it] 7%|▋ | 895/12188 [1:52:15<22:10:32, 7.07s/it] {'loss': 0.3994, 'grad_norm': 0.7802761489429634, 'learning_rate': 9.950862498983676e-06, 'epoch': 0.07} + 7%|▋ | 895/12188 [1:52:15<22:10:32, 7.07s/it] 7%|▋ | 896/12188 [1:52:22<21:53:53, 6.98s/it] {'loss': 0.4134, 'grad_norm': 0.7527347537586383, 'learning_rate': 9.95067650273834e-06, 'epoch': 0.07} + 7%|▋ | 896/12188 [1:52:22<21:53:53, 6.98s/it] 7%|▋ | 897/12188 [1:52:29<22:08:19, 7.06s/it] {'loss': 0.4113, 'grad_norm': 0.6890214690531933, 'learning_rate': 9.950490156884186e-06, 'epoch': 0.07} + 7%|▋ | 897/12188 [1:52:29<22:08:19, 7.06s/it] 7%|▋ | 898/12188 [1:52:38<23:44:25, 7.57s/it] {'loss': 0.4317, 'grad_norm': 0.6838558579117131, 'learning_rate': 9.95030346143437e-06, 'epoch': 0.07} + 7%|▋ | 898/12188 [1:52:38<23:44:25, 7.57s/it] 7%|▋ | 899/12188 [1:52:47<25:02:30, 7.99s/it] {'loss': 0.4778, 'grad_norm': 0.678739890794481, 'learning_rate': 9.950116416402076e-06, 'epoch': 0.07} + 7%|▋ | 899/12188 [1:52:47<25:02:30, 7.99s/it] 7%|▋ | 900/12188 [1:52:54<24:21:46, 7.77s/it] {'loss': 0.4229, 'grad_norm': 0.6806451820430354, 'learning_rate': 9.949929021800514e-06, 'epoch': 0.07} + 7%|▋ | 900/12188 [1:52:54<24:21:46, 7.77s/it] 7%|▋ | 901/12188 [1:53:01<23:25:10, 7.47s/it] {'loss': 0.4232, 'grad_norm': 0.6097848204732135, 'learning_rate': 9.949741277642917e-06, 'epoch': 0.07} + 7%|▋ | 901/12188 [1:53:01<23:25:10, 7.47s/it] 7%|▋ | 902/12188 [1:53:09<24:30:52, 7.82s/it] {'loss': 0.4055, 'grad_norm': 0.6592703303046097, 'learning_rate': 9.949553183942544e-06, 'epoch': 0.07} + 7%|▋ | 902/12188 [1:53:09<24:30:52, 7.82s/it] 7%|▋ | 903/12188 [1:53:18<25:40:47, 8.19s/it] {'loss': 0.4332, 'grad_norm': 0.7327266300162744, 'learning_rate': 9.949364740712675e-06, 'epoch': 0.07} + 7%|▋ | 903/12188 [1:53:18<25:40:47, 8.19s/it] 7%|▋ | 904/12188 [1:53:26<24:55:00, 7.95s/it] {'loss': 0.4174, 'grad_norm': 0.6950655072551598, 'learning_rate': 9.949175947966623e-06, 'epoch': 0.07} + 7%|▋ | 904/12188 [1:53:26<24:55:00, 7.95s/it] 7%|▋ | 905/12188 [1:53:33<23:50:03, 7.60s/it] {'loss': 0.4152, 'grad_norm': 0.689736571938754, 'learning_rate': 9.948986805717715e-06, 'epoch': 0.07} + 7%|▋ | 905/12188 [1:53:33<23:50:03, 7.60s/it] 7%|▋ | 906/12188 [1:53:40<23:33:20, 7.52s/it] {'loss': 0.4393, 'grad_norm': 0.6697604040175108, 'learning_rate': 9.94879731397931e-06, 'epoch': 0.07} + 7%|▋ | 906/12188 [1:53:40<23:33:20, 7.52s/it] 7%|▋ | 907/12188 [1:53:48<23:43:21, 7.57s/it] {'loss': 0.3938, 'grad_norm': 0.6336772643620869, 'learning_rate': 9.948607472764791e-06, 'epoch': 0.07} + 7%|▋ | 907/12188 [1:53:48<23:43:21, 7.57s/it] 7%|▋ | 908/12188 [1:53:55<23:23:48, 7.47s/it] {'loss': 0.4419, 'grad_norm': 0.6930726943638261, 'learning_rate': 9.948417282087561e-06, 'epoch': 0.07} + 7%|▋ | 908/12188 [1:53:55<23:23:48, 7.47s/it] 7%|▋ | 909/12188 [1:54:02<23:00:42, 7.34s/it] {'loss': 0.4478, 'grad_norm': 0.8882878295304286, 'learning_rate': 9.948226741961053e-06, 'epoch': 0.07} + 7%|▋ | 909/12188 [1:54:02<23:00:42, 7.34s/it] 7%|▋ | 910/12188 [1:54:11<24:48:43, 7.92s/it] {'loss': 0.421, 'grad_norm': 0.7635954101999421, 'learning_rate': 9.948035852398722e-06, 'epoch': 0.07} + 7%|▋ | 910/12188 [1:54:11<24:48:43, 7.92s/it] 7%|▋ | 911/12188 [1:54:18<24:13:47, 7.73s/it] {'loss': 0.3993, 'grad_norm': 0.6956740243472812, 'learning_rate': 9.94784461341405e-06, 'epoch': 0.07} + 7%|▋ | 911/12188 [1:54:18<24:13:47, 7.73s/it] 7%|▋ | 912/12188 [1:54:27<24:36:36, 7.86s/it] {'loss': 0.42, 'grad_norm': 0.6943207412592616, 'learning_rate': 9.947653025020541e-06, 'epoch': 0.07} + 7%|▋ | 912/12188 [1:54:27<24:36:36, 7.86s/it] 7%|▋ | 913/12188 [1:54:34<24:10:32, 7.72s/it] {'loss': 0.4854, 'grad_norm': 0.7912731434703135, 'learning_rate': 9.947461087231725e-06, 'epoch': 0.07} + 7%|▋ | 913/12188 [1:54:34<24:10:32, 7.72s/it] 7%|▋ | 914/12188 [1:54:41<23:21:54, 7.46s/it] {'loss': 0.4219, 'grad_norm': 0.7816489574607006, 'learning_rate': 9.947268800061153e-06, 'epoch': 0.07} + 7%|▋ | 914/12188 [1:54:41<23:21:54, 7.46s/it] 8%|▊ | 915/12188 [1:54:48<22:52:51, 7.31s/it] {'loss': 0.4564, 'grad_norm': 0.7449548075945912, 'learning_rate': 9.947076163522408e-06, 'epoch': 0.08} + 8%|▊ | 915/12188 [1:54:48<22:52:51, 7.31s/it] 8%|▊ | 916/12188 [1:54:55<22:45:27, 7.27s/it] {'loss': 0.4533, 'grad_norm': 0.7893298602374255, 'learning_rate': 9.946883177629094e-06, 'epoch': 0.08} + 8%|▊ | 916/12188 [1:54:55<22:45:27, 7.27s/it] 8%|▊ | 917/12188 [1:55:02<22:39:13, 7.24s/it] {'loss': 0.4207, 'grad_norm': 0.6835664232885097, 'learning_rate': 9.946689842394836e-06, 'epoch': 0.08} + 8%|▊ | 917/12188 [1:55:02<22:39:13, 7.24s/it] 8%|▊ | 918/12188 [1:55:09<22:05:44, 7.06s/it] {'loss': 0.4659, 'grad_norm': 0.7261451142975969, 'learning_rate': 9.94649615783329e-06, 'epoch': 0.08} + 8%|▊ | 918/12188 [1:55:09<22:05:44, 7.06s/it] 8%|▊ | 919/12188 [1:55:16<22:30:02, 7.19s/it] {'loss': 0.4225, 'grad_norm': 0.7026996002248749, 'learning_rate': 9.94630212395813e-06, 'epoch': 0.08} + 8%|▊ | 919/12188 [1:55:16<22:30:02, 7.19s/it] 8%|▊ | 920/12188 [1:55:23<22:17:19, 7.12s/it] {'loss': 0.4411, 'grad_norm': 0.6932871787626236, 'learning_rate': 9.946107740783063e-06, 'epoch': 0.08} + 8%|▊ | 920/12188 [1:55:23<22:17:19, 7.12s/it] 8%|▊ | 921/12188 [1:55:31<22:36:25, 7.22s/it] {'loss': 0.4409, 'grad_norm': 0.7353183343163795, 'learning_rate': 9.945913008321814e-06, 'epoch': 0.08} + 8%|▊ | 921/12188 [1:55:31<22:36:25, 7.22s/it] 8%|▊ | 922/12188 [1:55:38<22:21:30, 7.14s/it] {'loss': 0.4366, 'grad_norm': 0.871076373362069, 'learning_rate': 9.945717926588133e-06, 'epoch': 0.08} + 8%|▊ | 922/12188 [1:55:38<22:21:30, 7.14s/it] 8%|▊ | 923/12188 [1:55:44<22:03:32, 7.05s/it] {'loss': 0.432, 'grad_norm': 0.6988609904423111, 'learning_rate': 9.9455224955958e-06, 'epoch': 0.08} + 8%|▊ | 923/12188 [1:55:44<22:03:32, 7.05s/it] 8%|▊ | 924/12188 [1:55:54<24:42:46, 7.90s/it] {'loss': 0.4406, 'grad_norm': 0.6545253604519848, 'learning_rate': 9.945326715358612e-06, 'epoch': 0.08} + 8%|▊ | 924/12188 [1:55:54<24:42:46, 7.90s/it] 8%|▊ | 925/12188 [1:56:01<24:00:07, 7.67s/it] {'loss': 0.4461, 'grad_norm': 0.7083846834587697, 'learning_rate': 9.945130585890399e-06, 'epoch': 0.08} + 8%|▊ | 925/12188 [1:56:01<24:00:07, 7.67s/it] 8%|▊ | 926/12188 [1:56:08<23:18:31, 7.45s/it] {'loss': 0.4361, 'grad_norm': 1.2898628039675772, 'learning_rate': 9.944934107205006e-06, 'epoch': 0.08} + 8%|▊ | 926/12188 [1:56:08<23:18:31, 7.45s/it] 8%|▊ | 927/12188 [1:56:16<23:34:36, 7.54s/it] {'loss': 0.4573, 'grad_norm': 0.697107161232331, 'learning_rate': 9.944737279316312e-06, 'epoch': 0.08} + 8%|▊ | 927/12188 [1:56:16<23:34:36, 7.54s/it] 8%|▊ | 928/12188 [1:56:23<22:42:56, 7.26s/it] {'loss': 0.3838, 'grad_norm': 0.7189401834494062, 'learning_rate': 9.944540102238217e-06, 'epoch': 0.08} + 8%|▊ | 928/12188 [1:56:23<22:42:56, 7.26s/it] 8%|▊ | 929/12188 [1:56:30<22:45:54, 7.28s/it] {'loss': 0.4183, 'grad_norm': 0.7842953603209286, 'learning_rate': 9.944342575984643e-06, 'epoch': 0.08} + 8%|▊ | 929/12188 [1:56:30<22:45:54, 7.28s/it] 8%|▊ | 930/12188 [1:56:37<22:43:30, 7.27s/it] {'loss': 0.3732, 'grad_norm': 0.7738473268711619, 'learning_rate': 9.94414470056954e-06, 'epoch': 0.08} + 8%|▊ | 930/12188 [1:56:37<22:43:30, 7.27s/it] 8%|▊ | 931/12188 [1:56:44<22:05:14, 7.06s/it] {'loss': 0.4223, 'grad_norm': 0.7210879835269495, 'learning_rate': 9.943946476006882e-06, 'epoch': 0.08} + 8%|▊ | 931/12188 [1:56:44<22:05:14, 7.06s/it] 8%|▊ | 932/12188 [1:56:51<21:55:31, 7.01s/it] {'loss': 0.3744, 'grad_norm': 0.6551856613828901, 'learning_rate': 9.943747902310666e-06, 'epoch': 0.08} + 8%|▊ | 932/12188 [1:56:51<21:55:31, 7.01s/it] 8%|▊ | 933/12188 [1:56:58<21:45:33, 6.96s/it] {'loss': 0.4319, 'grad_norm': 0.6885321509046528, 'learning_rate': 9.943548979494918e-06, 'epoch': 0.08} + 8%|▊ | 933/12188 [1:56:58<21:45:33, 6.96s/it] 8%|▊ | 934/12188 [1:57:04<21:36:27, 6.91s/it] {'loss': 0.4151, 'grad_norm': 0.7713390769070645, 'learning_rate': 9.943349707573681e-06, 'epoch': 0.08} + 8%|▊ | 934/12188 [1:57:04<21:36:27, 6.91s/it] 8%|▊ | 935/12188 [1:57:14<23:59:04, 7.67s/it] {'loss': 0.4686, 'grad_norm': 0.7630425826260916, 'learning_rate': 9.943150086561031e-06, 'epoch': 0.08} + 8%|▊ | 935/12188 [1:57:14<23:59:04, 7.67s/it] 8%|▊ | 936/12188 [1:57:21<23:53:50, 7.65s/it] {'loss': 0.4256, 'grad_norm': 0.8875227056794636, 'learning_rate': 9.942950116471063e-06, 'epoch': 0.08} + 8%|▊ | 936/12188 [1:57:21<23:53:50, 7.65s/it] 8%|▊ | 937/12188 [1:57:28<23:11:21, 7.42s/it] {'loss': 0.4506, 'grad_norm': 0.9609378210550329, 'learning_rate': 9.9427497973179e-06, 'epoch': 0.08} + 8%|▊ | 937/12188 [1:57:28<23:11:21, 7.42s/it] 8%|▊ | 938/12188 [1:57:37<24:41:24, 7.90s/it] {'loss': 0.5057, 'grad_norm': 0.7510572080829931, 'learning_rate': 9.942549129115686e-06, 'epoch': 0.08} + 8%|▊ | 938/12188 [1:57:37<24:41:24, 7.90s/it] 8%|▊ | 939/12188 [1:57:45<24:11:17, 7.74s/it] {'loss': 0.3878, 'grad_norm': 0.6752516762993702, 'learning_rate': 9.942348111878594e-06, 'epoch': 0.08} + 8%|▊ | 939/12188 [1:57:45<24:11:17, 7.74s/it] 8%|▊ | 940/12188 [1:57:52<23:55:33, 7.66s/it] {'loss': 0.4667, 'grad_norm': 1.1233156209568713, 'learning_rate': 9.94214674562082e-06, 'epoch': 0.08} + 8%|▊ | 940/12188 [1:57:52<23:55:33, 7.66s/it] 8%|▊ | 941/12188 [1:58:00<23:55:49, 7.66s/it] {'loss': 0.4223, 'grad_norm': 0.7449681283722189, 'learning_rate': 9.941945030356584e-06, 'epoch': 0.08} + 8%|▊ | 941/12188 [1:58:00<23:55:49, 7.66s/it] 8%|▊ | 942/12188 [1:58:09<24:54:39, 7.97s/it] {'loss': 0.4223, 'grad_norm': 0.6985191627577464, 'learning_rate': 9.941742966100128e-06, 'epoch': 0.08} + 8%|▊ | 942/12188 [1:58:09<24:54:39, 7.97s/it] 8%|▊ | 943/12188 [1:58:17<25:02:44, 8.02s/it] {'loss': 0.4131, 'grad_norm': 0.6914461864723972, 'learning_rate': 9.941540552865722e-06, 'epoch': 0.08} + 8%|▊ | 943/12188 [1:58:17<25:02:44, 8.02s/it] 8%|▊ | 944/12188 [1:58:24<24:11:37, 7.75s/it] {'loss': 0.447, 'grad_norm': 0.7544006376179547, 'learning_rate': 9.941337790667662e-06, 'epoch': 0.08} + 8%|▊ | 944/12188 [1:58:24<24:11:37, 7.75s/it] 8%|▊ | 945/12188 [1:58:32<24:46:04, 7.93s/it] {'loss': 0.4277, 'grad_norm': 0.7073026888181757, 'learning_rate': 9.941134679520268e-06, 'epoch': 0.08} + 8%|▊ | 945/12188 [1:58:32<24:46:04, 7.93s/it] 8%|▊ | 946/12188 [1:58:39<24:03:40, 7.71s/it] {'loss': 0.4177, 'grad_norm': 0.643343866459783, 'learning_rate': 9.94093121943788e-06, 'epoch': 0.08} + 8%|▊ | 946/12188 [1:58:39<24:03:40, 7.71s/it] 8%|▊ | 947/12188 [1:58:47<24:07:22, 7.73s/it] {'loss': 0.4658, 'grad_norm': 0.7460106240755561, 'learning_rate': 9.940727410434868e-06, 'epoch': 0.08} + 8%|▊ | 947/12188 [1:58:47<24:07:22, 7.73s/it] 8%|▊ | 948/12188 [1:58:54<23:13:00, 7.44s/it] {'loss': 0.447, 'grad_norm': 0.7725057675577542, 'learning_rate': 9.940523252525623e-06, 'epoch': 0.08} + 8%|▊ | 948/12188 [1:58:54<23:13:00, 7.44s/it] 8%|▊ | 949/12188 [1:59:02<23:59:55, 7.69s/it] {'loss': 0.4519, 'grad_norm': 0.7674120448312616, 'learning_rate': 9.940318745724564e-06, 'epoch': 0.08} + 8%|▊ | 949/12188 [1:59:02<23:59:55, 7.69s/it] 8%|▊ | 950/12188 [1:59:09<23:02:17, 7.38s/it] {'loss': 0.4565, 'grad_norm': 1.2350816033993233, 'learning_rate': 9.940113890046133e-06, 'epoch': 0.08} + 8%|▊ | 950/12188 [1:59:09<23:02:17, 7.38s/it] 8%|▊ | 951/12188 [1:59:15<22:20:42, 7.16s/it] {'loss': 0.3869, 'grad_norm': 0.6770880968202859, 'learning_rate': 9.939908685504796e-06, 'epoch': 0.08} + 8%|▊ | 951/12188 [1:59:15<22:20:42, 7.16s/it] 8%|▊ | 952/12188 [1:59:23<22:38:45, 7.26s/it] {'loss': 0.4283, 'grad_norm': 0.764976404793502, 'learning_rate': 9.939703132115045e-06, 'epoch': 0.08} + 8%|▊ | 952/12188 [1:59:23<22:38:45, 7.26s/it] 8%|▊ | 953/12188 [1:59:30<22:48:04, 7.31s/it] {'loss': 0.4451, 'grad_norm': 0.8101584495342595, 'learning_rate': 9.939497229891393e-06, 'epoch': 0.08} + 8%|▊ | 953/12188 [1:59:30<22:48:04, 7.31s/it] 8%|▊ | 954/12188 [1:59:38<22:47:12, 7.30s/it] {'loss': 0.4162, 'grad_norm': 0.6503397799856954, 'learning_rate': 9.939290978848383e-06, 'epoch': 0.08} + 8%|▊ | 954/12188 [1:59:38<22:47:12, 7.30s/it] 8%|▊ | 955/12188 [1:59:44<22:21:32, 7.17s/it] {'loss': 0.4081, 'grad_norm': 0.6674709624121883, 'learning_rate': 9.939084379000582e-06, 'epoch': 0.08} + 8%|▊ | 955/12188 [1:59:44<22:21:32, 7.17s/it] 8%|▊ | 956/12188 [1:59:52<22:21:38, 7.17s/it] {'loss': 0.4328, 'grad_norm': 0.6719084212635281, 'learning_rate': 9.938877430362573e-06, 'epoch': 0.08} + 8%|▊ | 956/12188 [1:59:52<22:21:38, 7.17s/it] 8%|▊ | 957/12188 [1:59:59<22:25:24, 7.19s/it] {'loss': 0.4426, 'grad_norm': 0.7105399433680906, 'learning_rate': 9.938670132948978e-06, 'epoch': 0.08} + 8%|▊ | 957/12188 [1:59:59<22:25:24, 7.19s/it] 8%|▊ | 958/12188 [2:00:06<22:41:54, 7.28s/it] {'loss': 0.4748, 'grad_norm': 0.7475682885890519, 'learning_rate': 9.938462486774432e-06, 'epoch': 0.08} + 8%|▊ | 958/12188 [2:00:06<22:41:54, 7.28s/it] 8%|▊ | 959/12188 [2:00:14<23:02:14, 7.39s/it] {'loss': 0.4261, 'grad_norm': 0.7357594849524249, 'learning_rate': 9.9382544918536e-06, 'epoch': 0.08} + 8%|▊ | 959/12188 [2:00:14<23:02:14, 7.39s/it] 8%|▊ | 960/12188 [2:00:22<23:59:38, 7.69s/it] {'loss': 0.4195, 'grad_norm': 0.7511356934318226, 'learning_rate': 9.938046148201168e-06, 'epoch': 0.08} + 8%|▊ | 960/12188 [2:00:22<23:59:38, 7.69s/it] 8%|▊ | 961/12188 [2:00:31<24:25:37, 7.83s/it] {'loss': 0.4776, 'grad_norm': 0.8897985060127483, 'learning_rate': 9.937837455831852e-06, 'epoch': 0.08} + 8%|▊ | 961/12188 [2:00:31<24:25:37, 7.83s/it] 8%|▊ | 962/12188 [2:00:39<24:49:05, 7.96s/it] {'loss': 0.4493, 'grad_norm': 0.7313606580044121, 'learning_rate': 9.937628414760389e-06, 'epoch': 0.08} + 8%|▊ | 962/12188 [2:00:39<24:49:05, 7.96s/it] 8%|▊ | 963/12188 [2:00:46<24:25:17, 7.83s/it] {'loss': 0.4163, 'grad_norm': 0.7552914662034828, 'learning_rate': 9.937419025001537e-06, 'epoch': 0.08} + 8%|▊ | 963/12188 [2:00:46<24:25:17, 7.83s/it] 8%|▊ | 964/12188 [2:00:53<23:17:50, 7.47s/it] {'loss': 0.4647, 'grad_norm': 0.6880067089874974, 'learning_rate': 9.937209286570089e-06, 'epoch': 0.08} + 8%|▊ | 964/12188 [2:00:53<23:17:50, 7.47s/it] 8%|▊ | 965/12188 [2:01:00<22:47:41, 7.31s/it] {'loss': 0.4104, 'grad_norm': 0.6681690870529101, 'learning_rate': 9.936999199480854e-06, 'epoch': 0.08} + 8%|▊ | 965/12188 [2:01:00<22:47:41, 7.31s/it] 8%|▊ | 966/12188 [2:01:08<23:11:58, 7.44s/it] {'loss': 0.4234, 'grad_norm': 0.6863014432640218, 'learning_rate': 9.936788763748666e-06, 'epoch': 0.08} + 8%|▊ | 966/12188 [2:01:08<23:11:58, 7.44s/it] 8%|▊ | 967/12188 [2:01:16<24:26:42, 7.84s/it] {'loss': 0.3925, 'grad_norm': 0.6848333446601156, 'learning_rate': 9.936577979388387e-06, 'epoch': 0.08} + 8%|▊ | 967/12188 [2:01:16<24:26:42, 7.84s/it] 8%|▊ | 968/12188 [2:01:24<23:58:46, 7.69s/it] {'loss': 0.4238, 'grad_norm': 0.8358039902304838, 'learning_rate': 9.936366846414902e-06, 'epoch': 0.08} + 8%|▊ | 968/12188 [2:01:24<23:58:46, 7.69s/it] 8%|▊ | 969/12188 [2:01:31<23:29:02, 7.54s/it] {'loss': 0.4067, 'grad_norm': 0.7005072278565283, 'learning_rate': 9.936155364843121e-06, 'epoch': 0.08} + 8%|▊ | 969/12188 [2:01:31<23:29:02, 7.54s/it] 8%|▊ | 970/12188 [2:01:41<25:30:34, 8.19s/it] {'loss': 0.4625, 'grad_norm': 0.7347531023339785, 'learning_rate': 9.93594353468798e-06, 'epoch': 0.08} + 8%|▊ | 970/12188 [2:01:41<25:30:34, 8.19s/it] 8%|▊ | 971/12188 [2:01:48<24:24:51, 7.84s/it] {'loss': 0.468, 'grad_norm': 0.7996366688989736, 'learning_rate': 9.935731355964436e-06, 'epoch': 0.08} + 8%|▊ | 971/12188 [2:01:48<24:24:51, 7.84s/it] 8%|▊ | 972/12188 [2:01:55<23:29:20, 7.54s/it] {'loss': 0.3866, 'grad_norm': 0.7028483575972714, 'learning_rate': 9.935518828687473e-06, 'epoch': 0.08} + 8%|▊ | 972/12188 [2:01:55<23:29:20, 7.54s/it] 8%|▊ | 973/12188 [2:02:02<22:59:22, 7.38s/it] {'loss': 0.4736, 'grad_norm': 0.743354493091255, 'learning_rate': 9.935305952872101e-06, 'epoch': 0.08} + 8%|▊ | 973/12188 [2:02:02<22:59:22, 7.38s/it] 8%|▊ | 974/12188 [2:02:11<24:40:39, 7.92s/it] {'loss': 0.4574, 'grad_norm': 0.9220902604734729, 'learning_rate': 9.93509272853335e-06, 'epoch': 0.08} + 8%|▊ | 974/12188 [2:02:11<24:40:39, 7.92s/it] 8%|▊ | 975/12188 [2:02:18<23:56:52, 7.69s/it] {'loss': 0.4388, 'grad_norm': 1.7502816610519736, 'learning_rate': 9.934879155686282e-06, 'epoch': 0.08} + 8%|▊ | 975/12188 [2:02:18<23:56:52, 7.69s/it] 8%|▊ | 976/12188 [2:02:26<24:04:11, 7.73s/it] {'loss': 0.4415, 'grad_norm': 1.1522665175262483, 'learning_rate': 9.934665234345976e-06, 'epoch': 0.08} + 8%|▊ | 976/12188 [2:02:26<24:04:11, 7.73s/it] 8%|▊ | 977/12188 [2:02:33<23:49:56, 7.65s/it] {'loss': 0.3976, 'grad_norm': 0.7442682661279915, 'learning_rate': 9.934450964527536e-06, 'epoch': 0.08} + 8%|▊ | 977/12188 [2:02:33<23:49:56, 7.65s/it] 8%|▊ | 978/12188 [2:02:40<23:13:57, 7.46s/it] {'loss': 0.3975, 'grad_norm': 0.7429787221366082, 'learning_rate': 9.9342363462461e-06, 'epoch': 0.08} + 8%|▊ | 978/12188 [2:02:40<23:13:57, 7.46s/it] 8%|▊ | 979/12188 [2:02:48<23:26:54, 7.53s/it] {'loss': 0.464, 'grad_norm': 0.7670940570385435, 'learning_rate': 9.93402137951682e-06, 'epoch': 0.08} + 8%|▊ | 979/12188 [2:02:48<23:26:54, 7.53s/it] 8%|▊ | 980/12188 [2:02:56<23:36:53, 7.59s/it] {'loss': 0.3867, 'grad_norm': 0.8120672316617124, 'learning_rate': 9.933806064354876e-06, 'epoch': 0.08} + 8%|▊ | 980/12188 [2:02:56<23:36:53, 7.59s/it] 8%|▊ | 981/12188 [2:03:03<23:08:56, 7.44s/it] {'loss': 0.4426, 'grad_norm': 0.9700209124496794, 'learning_rate': 9.933590400775476e-06, 'epoch': 0.08} + 8%|▊ | 981/12188 [2:03:03<23:08:56, 7.44s/it] 8%|▊ | 982/12188 [2:03:10<22:44:58, 7.31s/it] {'loss': 0.394, 'grad_norm': 0.6219710262227189, 'learning_rate': 9.933374388793848e-06, 'epoch': 0.08} + 8%|▊ | 982/12188 [2:03:10<22:44:58, 7.31s/it] 8%|▊ | 983/12188 [2:03:17<23:08:15, 7.43s/it] {'loss': 0.4112, 'grad_norm': 2.3039172189744472, 'learning_rate': 9.933158028425247e-06, 'epoch': 0.08} + 8%|▊ | 983/12188 [2:03:17<23:08:15, 7.43s/it] 8%|▊ | 984/12188 [2:03:24<22:45:03, 7.31s/it] {'loss': 0.4364, 'grad_norm': 0.6837937934377377, 'learning_rate': 9.93294131968495e-06, 'epoch': 0.08} + 8%|▊ | 984/12188 [2:03:24<22:45:03, 7.31s/it] 8%|▊ | 985/12188 [2:03:31<21:56:07, 7.05s/it] {'loss': 0.4485, 'grad_norm': 0.6711868430532517, 'learning_rate': 9.932724262588265e-06, 'epoch': 0.08} + 8%|▊ | 985/12188 [2:03:31<21:56:07, 7.05s/it] 8%|▊ | 986/12188 [2:03:38<22:05:25, 7.10s/it] {'loss': 0.4223, 'grad_norm': 0.7243522554632826, 'learning_rate': 9.932506857150515e-06, 'epoch': 0.08} + 8%|▊ | 986/12188 [2:03:38<22:05:25, 7.10s/it] 8%|▊ | 987/12188 [2:03:45<22:00:48, 7.08s/it] {'loss': 0.4707, 'grad_norm': 0.7230729965485122, 'learning_rate': 9.932289103387056e-06, 'epoch': 0.08} + 8%|▊ | 987/12188 [2:03:45<22:00:48, 7.08s/it] 8%|▊ | 988/12188 [2:03:52<22:03:44, 7.09s/it] {'loss': 0.4492, 'grad_norm': 0.7315737055394317, 'learning_rate': 9.932071001313265e-06, 'epoch': 0.08} + 8%|▊ | 988/12188 [2:03:52<22:03:44, 7.09s/it] 8%|▊ | 989/12188 [2:03:59<21:47:19, 7.00s/it] {'loss': 0.4759, 'grad_norm': 0.7565897513852394, 'learning_rate': 9.931852550944545e-06, 'epoch': 0.08} + 8%|▊ | 989/12188 [2:03:59<21:47:19, 7.00s/it] 8%|▊ | 990/12188 [2:04:06<21:36:40, 6.95s/it] {'loss': 0.4148, 'grad_norm': 0.7617964081738033, 'learning_rate': 9.931633752296322e-06, 'epoch': 0.08} + 8%|▊ | 990/12188 [2:04:06<21:36:40, 6.95s/it] 8%|▊ | 991/12188 [2:04:13<21:33:29, 6.93s/it] {'loss': 0.4125, 'grad_norm': 0.7014227772353403, 'learning_rate': 9.931414605384044e-06, 'epoch': 0.08} + 8%|▊ | 991/12188 [2:04:13<21:33:29, 6.93s/it] 8%|▊ | 992/12188 [2:04:22<23:41:52, 7.62s/it] {'loss': 0.4204, 'grad_norm': 0.7256471475942593, 'learning_rate': 9.931195110223193e-06, 'epoch': 0.08} + 8%|▊ | 992/12188 [2:04:22<23:41:52, 7.62s/it] 8%|▊ | 993/12188 [2:04:29<23:11:59, 7.46s/it] {'loss': 0.4198, 'grad_norm': 0.7433496824395379, 'learning_rate': 9.930975266829263e-06, 'epoch': 0.08} + 8%|▊ | 993/12188 [2:04:29<23:11:59, 7.46s/it] 8%|▊ | 994/12188 [2:04:37<23:21:53, 7.51s/it] {'loss': 0.4001, 'grad_norm': 0.6375470855244235, 'learning_rate': 9.930755075217783e-06, 'epoch': 0.08} + 8%|▊ | 994/12188 [2:04:37<23:21:53, 7.51s/it] 8%|▊ | 995/12188 [2:04:44<22:57:52, 7.39s/it] {'loss': 0.4286, 'grad_norm': 0.6634490102429524, 'learning_rate': 9.930534535404302e-06, 'epoch': 0.08} + 8%|▊ | 995/12188 [2:04:44<22:57:52, 7.39s/it] 8%|▊ | 996/12188 [2:04:51<22:29:16, 7.23s/it] {'loss': 0.4167, 'grad_norm': 0.6929563429283587, 'learning_rate': 9.930313647404393e-06, 'epoch': 0.08} + 8%|▊ | 996/12188 [2:04:51<22:29:16, 7.23s/it] 8%|▊ | 997/12188 [2:04:59<23:27:17, 7.55s/it] {'loss': 0.4471, 'grad_norm': 0.7061253452778037, 'learning_rate': 9.930092411233654e-06, 'epoch': 0.08} + 8%|▊ | 997/12188 [2:04:59<23:27:17, 7.55s/it] 8%|▊ | 998/12188 [2:05:06<22:36:06, 7.27s/it] {'loss': 0.4654, 'grad_norm': 0.7318166001701418, 'learning_rate': 9.929870826907711e-06, 'epoch': 0.08} + 8%|▊ | 998/12188 [2:05:06<22:36:06, 7.27s/it] 8%|▊ | 999/12188 [2:05:12<22:10:33, 7.13s/it] {'loss': 0.435, 'grad_norm': 0.7116218930335934, 'learning_rate': 9.929648894442212e-06, 'epoch': 0.08} + 8%|▊ | 999/12188 [2:05:12<22:10:33, 7.13s/it] 8%|▊ | 1000/12188 [2:05:19<21:35:08, 6.95s/it] {'loss': 0.4691, 'grad_norm': 0.7413030730369929, 'learning_rate': 9.929426613852827e-06, 'epoch': 0.08} + 8%|▊ | 1000/12188 [2:05:19<21:35:08, 6.95s/it]/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/utils/checkpoint.py:87: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( + 8%|▊ | 1001/12188 [2:05:44<38:20:51, 12.34s/it] {'loss': 0.4178, 'grad_norm': 0.7109749000969268, 'learning_rate': 9.929203985155252e-06, 'epoch': 0.08} + 8%|▊ | 1001/12188 [2:05:44<38:20:51, 12.34s/it] 8%|▊ | 1002/12188 [2:05:51<33:50:25, 10.89s/it] {'loss': 0.4314, 'grad_norm': 0.7101325678692918, 'learning_rate': 9.928981008365213e-06, 'epoch': 0.08} + 8%|▊ | 1002/12188 [2:05:51<33:50:25, 10.89s/it] 8%|▊ | 1003/12188 [2:05:58<29:55:44, 9.63s/it] {'loss': 0.4718, 'grad_norm': 0.8291479379563896, 'learning_rate': 9.928757683498456e-06, 'epoch': 0.08} + 8%|▊ | 1003/12188 [2:05:58<29:55:44, 9.63s/it] 8%|▊ | 1004/12188 [2:06:08<30:12:18, 9.72s/it] {'loss': 0.4362, 'grad_norm': 0.6782988467240904, 'learning_rate': 9.928534010570747e-06, 'epoch': 0.08} + 8%|▊ | 1004/12188 [2:06:08<30:12:18, 9.72s/it] 8%|▊ | 1005/12188 [2:06:15<27:31:51, 8.86s/it] {'loss': 0.4206, 'grad_norm': 0.6620475997342419, 'learning_rate': 9.928309989597887e-06, 'epoch': 0.08} + 8%|▊ | 1005/12188 [2:06:15<27:31:51, 8.86s/it] 8%|▊ | 1006/12188 [2:06:22<25:32:15, 8.22s/it] {'loss': 0.4226, 'grad_norm': 0.6926867878717202, 'learning_rate': 9.928085620595692e-06, 'epoch': 0.08} + 8%|▊ | 1006/12188 [2:06:22<25:32:15, 8.22s/it] 8%|▊ | 1007/12188 [2:06:29<25:01:56, 8.06s/it] {'loss': 0.4119, 'grad_norm': 0.8360693859680834, 'learning_rate': 9.927860903580007e-06, 'epoch': 0.08} + 8%|▊ | 1007/12188 [2:06:29<25:01:56, 8.06s/it] 8%|▊ | 1008/12188 [2:06:36<23:57:17, 7.71s/it] {'loss': 0.4241, 'grad_norm': 0.6796352438548476, 'learning_rate': 9.927635838566703e-06, 'epoch': 0.08} + 8%|▊ | 1008/12188 [2:06:36<23:57:17, 7.71s/it] 8%|▊ | 1009/12188 [2:06:44<24:24:43, 7.86s/it] {'loss': 0.422, 'grad_norm': 0.7566544162276014, 'learning_rate': 9.927410425571674e-06, 'epoch': 0.08} + 8%|▊ | 1009/12188 [2:06:44<24:24:43, 7.86s/it] 8%|▊ | 1010/12188 [2:06:53<24:55:23, 8.03s/it] {'loss': 0.4216, 'grad_norm': 0.6886997363985676, 'learning_rate': 9.927184664610836e-06, 'epoch': 0.08} + 8%|▊ | 1010/12188 [2:06:53<24:55:23, 8.03s/it] 8%|▊ | 1011/12188 [2:07:02<26:14:18, 8.45s/it] {'loss': 0.3984, 'grad_norm': 0.767230530678925, 'learning_rate': 9.926958555700134e-06, 'epoch': 0.08} + 8%|▊ | 1011/12188 [2:07:02<26:14:18, 8.45s/it] 8%|▊ | 1012/12188 [2:07:09<24:40:56, 7.95s/it] {'loss': 0.4539, 'grad_norm': 0.6937348274003537, 'learning_rate': 9.926732098855533e-06, 'epoch': 0.08} + 8%|▊ | 1012/12188 [2:07:09<24:40:56, 7.95s/it] 8%|▊ | 1013/12188 [2:07:17<24:21:14, 7.85s/it] {'loss': 0.4065, 'grad_norm': 0.6496219761762128, 'learning_rate': 9.92650529409303e-06, 'epoch': 0.08} + 8%|▊ | 1013/12188 [2:07:17<24:21:14, 7.85s/it] 8%|▊ | 1014/12188 [2:07:26<25:59:58, 8.38s/it] {'loss': 0.3938, 'grad_norm': 0.6687353646590134, 'learning_rate': 9.926278141428635e-06, 'epoch': 0.08} + 8%|▊ | 1014/12188 [2:07:26<25:59:58, 8.38s/it] 8%|▊ | 1015/12188 [2:07:37<28:23:29, 9.15s/it] {'loss': 0.4339, 'grad_norm': 0.8926425933928338, 'learning_rate': 9.926050640878394e-06, 'epoch': 0.08} + 8%|▊ | 1015/12188 [2:07:37<28:23:29, 9.15s/it] 8%|▊ | 1016/12188 [2:07:44<26:38:03, 8.58s/it] {'loss': 0.4769, 'grad_norm': 0.742549667992309, 'learning_rate': 9.92582279245837e-06, 'epoch': 0.08} + 8%|▊ | 1016/12188 [2:07:44<26:38:03, 8.58s/it] 8%|▊ | 1017/12188 [2:07:54<27:19:36, 8.81s/it] {'loss': 0.4309, 'grad_norm': 0.7025865279669093, 'learning_rate': 9.925594596184654e-06, 'epoch': 0.08} + 8%|▊ | 1017/12188 [2:07:54<27:19:36, 8.81s/it] 8%|▊ | 1018/12188 [2:08:01<25:52:27, 8.34s/it] {'loss': 0.4454, 'grad_norm': 0.7249100770004988, 'learning_rate': 9.925366052073361e-06, 'epoch': 0.08} + 8%|▊ | 1018/12188 [2:08:01<25:52:27, 8.34s/it] 8%|▊ | 1019/12188 [2:08:08<24:58:21, 8.05s/it] {'loss': 0.411, 'grad_norm': 0.7065978333483551, 'learning_rate': 9.925137160140632e-06, 'epoch': 0.08} + 8%|▊ | 1019/12188 [2:08:08<24:58:21, 8.05s/it] 8%|▊ | 1020/12188 [2:08:15<23:46:10, 7.66s/it] {'loss': 0.435, 'grad_norm': 0.6706066774815876, 'learning_rate': 9.92490792040263e-06, 'epoch': 0.08} + 8%|▊ | 1020/12188 [2:08:15<23:46:10, 7.66s/it] 8%|▊ | 1021/12188 [2:08:22<22:52:29, 7.37s/it] {'loss': 0.448, 'grad_norm': 0.6200450348920604, 'learning_rate': 9.924678332875542e-06, 'epoch': 0.08} + 8%|▊ | 1021/12188 [2:08:22<22:52:29, 7.37s/it] 8%|▊ | 1022/12188 [2:08:31<24:45:54, 7.98s/it] {'loss': 0.4132, 'grad_norm': 0.7084692607020989, 'learning_rate': 9.924448397575582e-06, 'epoch': 0.08} + 8%|▊ | 1022/12188 [2:08:31<24:45:54, 7.98s/it] 8%|▊ | 1023/12188 [2:08:38<23:30:22, 7.58s/it] {'loss': 0.4132, 'grad_norm': 0.6736870831500558, 'learning_rate': 9.924218114518988e-06, 'epoch': 0.08} + 8%|▊ | 1023/12188 [2:08:38<23:30:22, 7.58s/it] 8%|▊ | 1024/12188 [2:08:48<25:48:10, 8.32s/it] {'loss': 0.4262, 'grad_norm': 0.6702252747035212, 'learning_rate': 9.923987483722023e-06, 'epoch': 0.08} + 8%|▊ | 1024/12188 [2:08:48<25:48:10, 8.32s/it] 8%|▊ | 1025/12188 [2:08:55<24:19:53, 7.85s/it] {'loss': 0.4083, 'grad_norm': 0.7831291570433181, 'learning_rate': 9.923756505200973e-06, 'epoch': 0.08} + 8%|▊ | 1025/12188 [2:08:55<24:19:53, 7.85s/it] 8%|▊ | 1026/12188 [2:09:03<24:22:35, 7.86s/it] {'loss': 0.3683, 'grad_norm': 0.6874715800844986, 'learning_rate': 9.923525178972149e-06, 'epoch': 0.08} + 8%|▊ | 1026/12188 [2:09:03<24:22:35, 7.86s/it] 8%|▊ | 1027/12188 [2:09:11<24:32:04, 7.91s/it] {'loss': 0.3953, 'grad_norm': 0.6836167814821877, 'learning_rate': 9.923293505051885e-06, 'epoch': 0.08} + 8%|▊ | 1027/12188 [2:09:11<24:32:04, 7.91s/it] 8%|▊ | 1028/12188 [2:09:19<24:50:15, 8.01s/it] {'loss': 0.409, 'grad_norm': 0.6777278079852787, 'learning_rate': 9.923061483456547e-06, 'epoch': 0.08} + 8%|▊ | 1028/12188 [2:09:19<24:50:15, 8.01s/it] 8%|▊ | 1029/12188 [2:09:26<24:09:26, 7.79s/it] {'loss': 0.4148, 'grad_norm': 0.6862091014435101, 'learning_rate': 9.922829114202513e-06, 'epoch': 0.08} + 8%|▊ | 1029/12188 [2:09:26<24:09:26, 7.79s/it] 8%|▊ | 1030/12188 [2:09:34<24:23:43, 7.87s/it] {'loss': 0.3975, 'grad_norm': 0.7424056241180106, 'learning_rate': 9.922596397306199e-06, 'epoch': 0.08} + 8%|▊ | 1030/12188 [2:09:34<24:23:43, 7.87s/it] 8%|▊ | 1031/12188 [2:09:42<24:30:14, 7.91s/it] {'loss': 0.3885, 'grad_norm': 0.6391184681213998, 'learning_rate': 9.922363332784034e-06, 'epoch': 0.08} + 8%|▊ | 1031/12188 [2:09:42<24:30:14, 7.91s/it] 8%|▊ | 1032/12188 [2:09:49<23:19:32, 7.53s/it] {'loss': 0.4218, 'grad_norm': 0.6992696137506641, 'learning_rate': 9.922129920652481e-06, 'epoch': 0.08} + 8%|▊ | 1032/12188 [2:09:49<23:19:32, 7.53s/it] 8%|▊ | 1033/12188 [2:09:56<23:13:31, 7.50s/it] {'loss': 0.4229, 'grad_norm': 0.6558169128359677, 'learning_rate': 9.92189616092802e-06, 'epoch': 0.08} + 8%|▊ | 1033/12188 [2:09:56<23:13:31, 7.50s/it] 8%|▊ | 1034/12188 [2:10:05<24:03:19, 7.76s/it] {'loss': 0.4245, 'grad_norm': 0.7000458306021213, 'learning_rate': 9.92166205362716e-06, 'epoch': 0.08} + 8%|▊ | 1034/12188 [2:10:05<24:03:19, 7.76s/it] 8%|▊ | 1035/12188 [2:10:12<23:38:42, 7.63s/it] {'loss': 0.4409, 'grad_norm': 0.6711951555984216, 'learning_rate': 9.921427598766432e-06, 'epoch': 0.08} + 8%|▊ | 1035/12188 [2:10:12<23:38:42, 7.63s/it] 9%|▊ | 1036/12188 [2:10:19<23:16:28, 7.51s/it] {'loss': 0.4256, 'grad_norm': 0.6955976846783224, 'learning_rate': 9.921192796362393e-06, 'epoch': 0.08} + 9%|▊ | 1036/12188 [2:10:19<23:16:28, 7.51s/it] 9%|▊ | 1037/12188 [2:10:26<22:48:09, 7.36s/it] {'loss': 0.4265, 'grad_norm': 0.7166870287698903, 'learning_rate': 9.920957646431628e-06, 'epoch': 0.09} + 9%|▊ | 1037/12188 [2:10:26<22:48:09, 7.36s/it] 9%|▊ | 1038/12188 [2:10:33<22:21:38, 7.22s/it] {'loss': 0.4301, 'grad_norm': 0.710983726479255, 'learning_rate': 9.920722148990737e-06, 'epoch': 0.09} + 9%|▊ | 1038/12188 [2:10:33<22:21:38, 7.22s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 9%|▊ | 1039/12188 [2:10:39<21:36:59, 6.98s/it] {'loss': 0.7815, 'grad_norm': 1.0047287352986747, 'learning_rate': 9.920486304056355e-06, 'epoch': 0.09} + 9%|▊ | 1039/12188 [2:10:39<21:36:59, 6.98s/it] 9%|▊ | 1040/12188 [2:10:49<23:59:05, 7.75s/it] {'loss': 0.4381, 'grad_norm': 0.7087288088109175, 'learning_rate': 9.920250111645138e-06, 'epoch': 0.09} + 9%|▊ | 1040/12188 [2:10:49<23:59:05, 7.75s/it] 9%|▊ | 1041/12188 [2:10:56<23:32:10, 7.60s/it] {'loss': 0.4122, 'grad_norm': 0.6701075285327227, 'learning_rate': 9.92001357177376e-06, 'epoch': 0.09} + 9%|▊ | 1041/12188 [2:10:56<23:32:10, 7.60s/it] 9%|▊ | 1042/12188 [2:11:05<24:15:34, 7.84s/it] {'loss': 0.4183, 'grad_norm': 0.724673079598315, 'learning_rate': 9.919776684458931e-06, 'epoch': 0.09} + 9%|▊ | 1042/12188 [2:11:05<24:15:34, 7.84s/it] 9%|▊ | 1043/12188 [2:11:12<23:37:28, 7.63s/it] {'loss': 0.4422, 'grad_norm': 0.6884466272764912, 'learning_rate': 9.919539449717373e-06, 'epoch': 0.09} + 9%|▊ | 1043/12188 [2:11:12<23:37:28, 7.63s/it] 9%|▊ | 1044/12188 [2:11:19<23:22:06, 7.55s/it] {'loss': 0.4538, 'grad_norm': 0.7656571946524281, 'learning_rate': 9.919301867565845e-06, 'epoch': 0.09} + 9%|▊ | 1044/12188 [2:11:19<23:22:06, 7.55s/it] 9%|▊ | 1045/12188 [2:11:27<23:39:09, 7.64s/it] {'loss': 0.4377, 'grad_norm': 0.6480145113354768, 'learning_rate': 9.919063938021123e-06, 'epoch': 0.09} + 9%|▊ | 1045/12188 [2:11:27<23:39:09, 7.64s/it] 9%|▊ | 1046/12188 [2:11:34<23:11:05, 7.49s/it] {'loss': 0.4427, 'grad_norm': 0.6608759176885819, 'learning_rate': 9.918825661100009e-06, 'epoch': 0.09} + 9%|▊ | 1046/12188 [2:11:34<23:11:05, 7.49s/it] 9%|▊ | 1047/12188 [2:11:41<22:53:35, 7.40s/it] {'loss': 0.4193, 'grad_norm': 0.6595823057513124, 'learning_rate': 9.918587036819328e-06, 'epoch': 0.09} + 9%|▊ | 1047/12188 [2:11:41<22:53:35, 7.40s/it] 9%|▊ | 1048/12188 [2:11:48<22:19:19, 7.21s/it] {'loss': 0.4281, 'grad_norm': 0.6554237050530093, 'learning_rate': 9.918348065195934e-06, 'epoch': 0.09} + 9%|▊ | 1048/12188 [2:11:48<22:19:19, 7.21s/it] 9%|▊ | 1049/12188 [2:11:56<22:58:22, 7.42s/it] {'loss': 0.4735, 'grad_norm': 0.7545346461710065, 'learning_rate': 9.9181087462467e-06, 'epoch': 0.09} + 9%|▊ | 1049/12188 [2:11:56<22:58:22, 7.42s/it] 9%|▊ | 1050/12188 [2:12:04<23:39:40, 7.65s/it] {'loss': 0.4053, 'grad_norm': 0.6573118995796838, 'learning_rate': 9.917869079988529e-06, 'epoch': 0.09} + 9%|▊ | 1050/12188 [2:12:04<23:39:40, 7.65s/it] 9%|▊ | 1051/12188 [2:12:11<22:52:54, 7.40s/it] {'loss': 0.3884, 'grad_norm': 0.5527231567433245, 'learning_rate': 9.917629066438346e-06, 'epoch': 0.09} + 9%|▊ | 1051/12188 [2:12:11<22:52:54, 7.40s/it] 9%|▊ | 1052/12188 [2:12:19<22:57:24, 7.42s/it] {'loss': 0.3765, 'grad_norm': 0.6358988613837268, 'learning_rate': 9.917388705613099e-06, 'epoch': 0.09} + 9%|▊ | 1052/12188 [2:12:19<22:57:24, 7.42s/it] 9%|▊ | 1053/12188 [2:12:26<23:15:24, 7.52s/it] {'loss': 0.4373, 'grad_norm': 0.69036936626371, 'learning_rate': 9.917147997529759e-06, 'epoch': 0.09} + 9%|▊ | 1053/12188 [2:12:26<23:15:24, 7.52s/it] 9%|▊ | 1054/12188 [2:12:34<23:03:41, 7.46s/it] {'loss': 0.4477, 'grad_norm': 0.624132952260046, 'learning_rate': 9.916906942205328e-06, 'epoch': 0.09} + 9%|▊ | 1054/12188 [2:12:34<23:03:41, 7.46s/it] 9%|▊ | 1055/12188 [2:12:42<24:14:32, 7.84s/it] {'loss': 0.4189, 'grad_norm': 0.6612300601599008, 'learning_rate': 9.91666553965683e-06, 'epoch': 0.09} + 9%|▊ | 1055/12188 [2:12:42<24:14:32, 7.84s/it] 9%|▊ | 1056/12188 [2:12:51<24:57:08, 8.07s/it] {'loss': 0.4103, 'grad_norm': 0.7113183075168988, 'learning_rate': 9.916423789901312e-06, 'epoch': 0.09} + 9%|▊ | 1056/12188 [2:12:51<24:57:08, 8.07s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f6e96844fe0> +[Try #0] Failed to fetch sample 4821869 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f6e96844fe0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'en.wiktionary.org'"}, {'from': 'gpt', 'value': '\nclick(x=0.909, y=0.0635)\n'}]} + 9%|▊ | 1057/12188 [2:13:02<27:27:57, 8.88s/it] {'loss': 0.4233, 'grad_norm': 0.6857232483399694, 'learning_rate': 9.916181692955841e-06, 'epoch': 0.09} + 9%|▊ | 1057/12188 [2:13:02<27:27:57, 8.88s/it] 9%|▊ | 1058/12188 [2:13:08<25:28:39, 8.24s/it] {'loss': 0.4307, 'grad_norm': 0.7375707281641389, 'learning_rate': 9.91593924883752e-06, 'epoch': 0.09} + 9%|▊ | 1058/12188 [2:13:08<25:28:39, 8.24s/it] 9%|▊ | 1059/12188 [2:13:16<24:33:59, 7.95s/it] {'loss': 0.3817, 'grad_norm': 0.6752922173214406, 'learning_rate': 9.915696457563469e-06, 'epoch': 0.09} + 9%|▊ | 1059/12188 [2:13:16<24:33:59, 7.95s/it] 9%|▊ | 1060/12188 [2:13:23<23:56:22, 7.74s/it] {'loss': 0.4549, 'grad_norm': 0.6564900849142147, 'learning_rate': 9.915453319150829e-06, 'epoch': 0.09} + 9%|▊ | 1060/12188 [2:13:23<23:56:22, 7.74s/it] 9%|▊ | 1061/12188 [2:13:31<23:46:37, 7.69s/it] {'loss': 0.4091, 'grad_norm': 0.7255165723440383, 'learning_rate': 9.915209833616773e-06, 'epoch': 0.09} + 9%|▊ | 1061/12188 [2:13:31<23:46:37, 7.69s/it] 9%|▊ | 1062/12188 [2:13:38<23:38:06, 7.65s/it] {'loss': 0.4504, 'grad_norm': 0.6728665627436781, 'learning_rate': 9.914966000978497e-06, 'epoch': 0.09} + 9%|▊ | 1062/12188 [2:13:38<23:38:06, 7.65s/it] 9%|▊ | 1063/12188 [2:13:45<23:22:35, 7.56s/it] {'loss': 0.4279, 'grad_norm': 0.694940781485747, 'learning_rate': 9.914721821253218e-06, 'epoch': 0.09} + 9%|▊ | 1063/12188 [2:13:45<23:22:35, 7.56s/it] 9%|▊ | 1064/12188 [2:13:52<22:38:09, 7.33s/it] {'loss': 0.4097, 'grad_norm': 0.6721150116638501, 'learning_rate': 9.914477294458178e-06, 'epoch': 0.09} + 9%|▊ | 1064/12188 [2:13:52<22:38:09, 7.33s/it] 9%|▊ | 1065/12188 [2:14:01<24:03:13, 7.79s/it] {'loss': 0.4361, 'grad_norm': 0.681894881598639, 'learning_rate': 9.914232420610649e-06, 'epoch': 0.09} + 9%|▊ | 1065/12188 [2:14:01<24:03:13, 7.79s/it] 9%|▊ | 1066/12188 [2:14:09<24:02:37, 7.78s/it] {'loss': 0.4385, 'grad_norm': 0.7807027246232714, 'learning_rate': 9.91398719972792e-06, 'epoch': 0.09} + 9%|▊ | 1066/12188 [2:14:09<24:02:37, 7.78s/it] 9%|▉ | 1067/12188 [2:14:16<23:49:17, 7.71s/it] {'loss': 0.4686, 'grad_norm': 0.7067243541504444, 'learning_rate': 9.913741631827314e-06, 'epoch': 0.09} + 9%|▉ | 1067/12188 [2:14:16<23:49:17, 7.71s/it] 9%|▉ | 1068/12188 [2:14:23<22:59:59, 7.45s/it] {'loss': 0.3974, 'grad_norm': 0.6779428838423512, 'learning_rate': 9.913495716926165e-06, 'epoch': 0.09} + 9%|▉ | 1068/12188 [2:14:23<22:59:59, 7.45s/it] 9%|▉ | 1069/12188 [2:14:30<22:14:05, 7.20s/it] {'loss': 0.4401, 'grad_norm': 0.6632642701827999, 'learning_rate': 9.913249455041843e-06, 'epoch': 0.09} + 9%|▉ | 1069/12188 [2:14:30<22:14:05, 7.20s/it] 9%|▉ | 1070/12188 [2:14:38<22:50:05, 7.39s/it] {'loss': 0.3846, 'grad_norm': 0.6614633816266263, 'learning_rate': 9.913002846191739e-06, 'epoch': 0.09} + 9%|▉ | 1070/12188 [2:14:38<22:50:05, 7.39s/it] 9%|▉ | 1071/12188 [2:14:45<22:19:42, 7.23s/it] {'loss': 0.4187, 'grad_norm': 0.7229895222299344, 'learning_rate': 9.912755890393266e-06, 'epoch': 0.09} + 9%|▉ | 1071/12188 [2:14:45<22:19:42, 7.23s/it] 9%|▉ | 1072/12188 [2:14:53<23:46:42, 7.70s/it] {'loss': 0.4104, 'grad_norm': 0.6640987055109133, 'learning_rate': 9.912508587663867e-06, 'epoch': 0.09} + 9%|▉ | 1072/12188 [2:14:53<23:46:42, 7.70s/it] 9%|▉ | 1073/12188 [2:15:03<25:33:12, 8.28s/it] {'loss': 0.4223, 'grad_norm': 0.7069488306111321, 'learning_rate': 9.912260938021003e-06, 'epoch': 0.09} + 9%|▉ | 1073/12188 [2:15:03<25:33:12, 8.28s/it] 9%|▉ | 1074/12188 [2:15:10<24:49:31, 8.04s/it] {'loss': 0.4541, 'grad_norm': 0.7119552481519055, 'learning_rate': 9.912012941482163e-06, 'epoch': 0.09} + 9%|▉ | 1074/12188 [2:15:10<24:49:31, 8.04s/it] 9%|▉ | 1075/12188 [2:15:18<24:15:41, 7.86s/it] {'loss': 0.4416, 'grad_norm': 0.6663159607258136, 'learning_rate': 9.911764598064862e-06, 'epoch': 0.09} + 9%|▉ | 1075/12188 [2:15:18<24:15:41, 7.86s/it] 9%|▉ | 1076/12188 [2:15:28<26:47:38, 8.68s/it] {'loss': 0.4312, 'grad_norm': 0.6811971531092029, 'learning_rate': 9.911515907786637e-06, 'epoch': 0.09} + 9%|▉ | 1076/12188 [2:15:28<26:47:38, 8.68s/it] 9%|▉ | 1077/12188 [2:15:36<25:42:05, 8.33s/it] {'loss': 0.3804, 'grad_norm': 0.6525403306679165, 'learning_rate': 9.911266870665048e-06, 'epoch': 0.09} + 9%|▉ | 1077/12188 [2:15:36<25:42:05, 8.33s/it] 9%|▉ | 1078/12188 [2:15:45<26:01:20, 8.43s/it] {'loss': 0.4022, 'grad_norm': 0.6345804648482422, 'learning_rate': 9.911017486717683e-06, 'epoch': 0.09} + 9%|▉ | 1078/12188 [2:15:45<26:01:20, 8.43s/it] 9%|▉ | 1079/12188 [2:15:53<25:28:05, 8.25s/it] {'loss': 0.4261, 'grad_norm': 0.9479582327611155, 'learning_rate': 9.910767755962153e-06, 'epoch': 0.09} + 9%|▉ | 1079/12188 [2:15:53<25:28:05, 8.25s/it] 9%|▉ | 1080/12188 [2:15:59<24:14:02, 7.85s/it] {'loss': 0.4393, 'grad_norm': 0.6705686620567799, 'learning_rate': 9.910517678416097e-06, 'epoch': 0.09} + 9%|▉ | 1080/12188 [2:15:59<24:14:02, 7.85s/it] 9%|▉ | 1081/12188 [2:16:07<23:37:14, 7.66s/it] {'loss': 0.4518, 'grad_norm': 0.7658878665588302, 'learning_rate': 9.910267254097169e-06, 'epoch': 0.09} + 9%|▉ | 1081/12188 [2:16:07<23:37:14, 7.66s/it] 9%|▉ | 1082/12188 [2:16:14<22:56:14, 7.44s/it] {'loss': 0.4247, 'grad_norm': 0.7657366768629393, 'learning_rate': 9.910016483023058e-06, 'epoch': 0.09} + 9%|▉ | 1082/12188 [2:16:14<22:56:14, 7.44s/it] 9%|▉ | 1083/12188 [2:16:20<22:21:52, 7.25s/it] {'loss': 0.399, 'grad_norm': 0.6868276727542265, 'learning_rate': 9.90976536521147e-06, 'epoch': 0.09} + 9%|▉ | 1083/12188 [2:16:20<22:21:52, 7.25s/it] 9%|▉ | 1084/12188 [2:16:28<22:36:44, 7.33s/it] {'loss': 0.4265, 'grad_norm': 0.7435052617403838, 'learning_rate': 9.909513900680141e-06, 'epoch': 0.09} + 9%|▉ | 1084/12188 [2:16:28<22:36:44, 7.33s/it] 9%|▉ | 1085/12188 [2:16:35<22:49:58, 7.40s/it] {'loss': 0.4304, 'grad_norm': 0.7151618488531751, 'learning_rate': 9.909262089446828e-06, 'epoch': 0.09} + 9%|▉ | 1085/12188 [2:16:35<22:49:58, 7.40s/it] 9%|▉ | 1086/12188 [2:16:45<25:01:44, 8.12s/it] {'loss': 0.4006, 'grad_norm': 0.6473982113877929, 'learning_rate': 9.909009931529313e-06, 'epoch': 0.09} + 9%|▉ | 1086/12188 [2:16:45<25:01:44, 8.12s/it] 9%|▉ | 1087/12188 [2:16:52<24:13:28, 7.86s/it] {'loss': 0.4082, 'grad_norm': 0.6936941613236083, 'learning_rate': 9.908757426945403e-06, 'epoch': 0.09} + 9%|▉ | 1087/12188 [2:16:52<24:13:28, 7.86s/it] 9%|▉ | 1088/12188 [2:17:00<24:16:59, 7.88s/it] {'loss': 0.4244, 'grad_norm': 0.7060797548381823, 'learning_rate': 9.908504575712932e-06, 'epoch': 0.09} + 9%|▉ | 1088/12188 [2:17:00<24:16:59, 7.88s/it] 9%|▉ | 1089/12188 [2:17:08<24:09:13, 7.83s/it] {'loss': 0.4124, 'grad_norm': 0.7000405717845052, 'learning_rate': 9.908251377849752e-06, 'epoch': 0.09} + 9%|▉ | 1089/12188 [2:17:08<24:09:13, 7.83s/it] 9%|▉ | 1090/12188 [2:17:15<23:17:09, 7.55s/it] {'loss': 0.3897, 'grad_norm': 0.7065352789596681, 'learning_rate': 9.907997833373747e-06, 'epoch': 0.09} + 9%|▉ | 1090/12188 [2:17:15<23:17:09, 7.55s/it] 9%|▉ | 1091/12188 [2:17:23<23:15:06, 7.54s/it] {'loss': 0.4545, 'grad_norm': 0.7068961064751251, 'learning_rate': 9.907743942302818e-06, 'epoch': 0.09} + 9%|▉ | 1091/12188 [2:17:23<23:15:06, 7.54s/it] 9%|▉ | 1092/12188 [2:17:30<23:04:37, 7.49s/it] {'loss': 0.42, 'grad_norm': 0.6920685948969771, 'learning_rate': 9.9074897046549e-06, 'epoch': 0.09} + 9%|▉ | 1092/12188 [2:17:30<23:04:37, 7.49s/it] 9%|▉ | 1093/12188 [2:17:37<22:27:18, 7.29s/it] {'loss': 0.4235, 'grad_norm': 0.6110309719032815, 'learning_rate': 9.907235120447942e-06, 'epoch': 0.09} + 9%|▉ | 1093/12188 [2:17:37<22:27:18, 7.29s/it] 9%|▉ | 1094/12188 [2:17:44<22:23:01, 7.26s/it] {'loss': 0.4058, 'grad_norm': 0.6402226610263476, 'learning_rate': 9.906980189699924e-06, 'epoch': 0.09} + 9%|▉ | 1094/12188 [2:17:44<22:23:01, 7.26s/it] 9%|▉ | 1095/12188 [2:17:51<22:01:40, 7.15s/it] {'loss': 0.45, 'grad_norm': 0.7503252325187472, 'learning_rate': 9.906724912428848e-06, 'epoch': 0.09} + 9%|▉ | 1095/12188 [2:17:51<22:01:40, 7.15s/it] 9%|▉ | 1096/12188 [2:17:59<22:51:34, 7.42s/it] {'loss': 0.4432, 'grad_norm': 0.7198361092421223, 'learning_rate': 9.906469288652743e-06, 'epoch': 0.09} + 9%|▉ | 1096/12188 [2:17:59<22:51:34, 7.42s/it] 9%|▉ | 1097/12188 [2:18:07<23:55:29, 7.77s/it] {'loss': 0.3851, 'grad_norm': 0.651812856912975, 'learning_rate': 9.906213318389658e-06, 'epoch': 0.09} + 9%|▉ | 1097/12188 [2:18:07<23:55:29, 7.77s/it] 9%|▉ | 1098/12188 [2:18:14<23:02:11, 7.48s/it] {'loss': 0.43, 'grad_norm': 0.6950469907652165, 'learning_rate': 9.905957001657673e-06, 'epoch': 0.09} + 9%|▉ | 1098/12188 [2:18:14<23:02:11, 7.48s/it] 9%|▉ | 1099/12188 [2:18:22<22:50:30, 7.42s/it] {'loss': 0.4734, 'grad_norm': 0.6822615634633077, 'learning_rate': 9.905700338474885e-06, 'epoch': 0.09} + 9%|▉ | 1099/12188 [2:18:22<22:50:30, 7.42s/it] 9%|▉ | 1100/12188 [2:18:28<22:24:57, 7.28s/it] {'loss': 0.3991, 'grad_norm': 0.7505811462089138, 'learning_rate': 9.905443328859421e-06, 'epoch': 0.09} + 9%|▉ | 1100/12188 [2:18:28<22:24:57, 7.28s/it] 9%|▉ | 1101/12188 [2:18:35<21:49:48, 7.09s/it] {'loss': 0.3894, 'grad_norm': 0.6829065042436095, 'learning_rate': 9.905185972829429e-06, 'epoch': 0.09} + 9%|▉ | 1101/12188 [2:18:35<21:49:48, 7.09s/it] 9%|▉ | 1102/12188 [2:18:42<21:41:23, 7.04s/it] {'loss': 0.4165, 'grad_norm': 0.6323049748513924, 'learning_rate': 9.904928270403087e-06, 'epoch': 0.09} + 9%|▉ | 1102/12188 [2:18:42<21:41:23, 7.04s/it] 9%|▉ | 1103/12188 [2:18:49<21:35:29, 7.01s/it] {'loss': 0.4621, 'grad_norm': 0.680080121790264, 'learning_rate': 9.90467022159859e-06, 'epoch': 0.09} + 9%|▉ | 1103/12188 [2:18:49<21:35:29, 7.01s/it] 9%|▉ | 1104/12188 [2:18:56<21:42:03, 7.05s/it] {'loss': 0.4434, 'grad_norm': 0.7115796796423162, 'learning_rate': 9.904411826434161e-06, 'epoch': 0.09} + 9%|▉ | 1104/12188 [2:18:56<21:42:03, 7.05s/it] 9%|▉ | 1105/12188 [2:19:03<21:37:26, 7.02s/it] {'loss': 0.4298, 'grad_norm': 0.6722889074622413, 'learning_rate': 9.90415308492805e-06, 'epoch': 0.09} + 9%|▉ | 1105/12188 [2:19:03<21:37:26, 7.02s/it] 9%|▉ | 1106/12188 [2:19:10<21:26:58, 6.97s/it] {'loss': 0.4472, 'grad_norm': 0.7095079143746601, 'learning_rate': 9.903893997098526e-06, 'epoch': 0.09} + 9%|▉ | 1106/12188 [2:19:10<21:26:58, 6.97s/it] 9%|▉ | 1107/12188 [2:19:17<21:38:39, 7.03s/it] {'loss': 0.4006, 'grad_norm': 0.6877716800418657, 'learning_rate': 9.903634562963886e-06, 'epoch': 0.09} + 9%|▉ | 1107/12188 [2:19:17<21:38:39, 7.03s/it] 9%|▉ | 1108/12188 [2:19:24<21:29:52, 6.98s/it] {'loss': 0.4086, 'grad_norm': 0.6384142549447768, 'learning_rate': 9.903374782542454e-06, 'epoch': 0.09} + 9%|▉ | 1108/12188 [2:19:24<21:29:52, 6.98s/it] 9%|▉ | 1109/12188 [2:19:33<23:10:39, 7.53s/it] {'loss': 0.3959, 'grad_norm': 0.7006394616747653, 'learning_rate': 9.90311465585257e-06, 'epoch': 0.09} + 9%|▉ | 1109/12188 [2:19:33<23:10:39, 7.53s/it] 9%|▉ | 1110/12188 [2:19:40<23:14:36, 7.55s/it] {'loss': 0.4789, 'grad_norm': 0.7363399567599227, 'learning_rate': 9.902854182912608e-06, 'epoch': 0.09} + 9%|▉ | 1110/12188 [2:19:40<23:14:36, 7.55s/it] 9%|▉ | 1111/12188 [2:19:48<23:08:33, 7.52s/it] {'loss': 0.4408, 'grad_norm': 0.6468754383218632, 'learning_rate': 9.90259336374096e-06, 'epoch': 0.09} + 9%|▉ | 1111/12188 [2:19:48<23:08:33, 7.52s/it] 9%|▉ | 1112/12188 [2:19:55<23:00:57, 7.48s/it] {'loss': 0.4248, 'grad_norm': 0.651911083067125, 'learning_rate': 9.902332198356045e-06, 'epoch': 0.09} + 9%|▉ | 1112/12188 [2:19:55<23:00:57, 7.48s/it] 9%|▉ | 1113/12188 [2:20:02<22:43:04, 7.38s/it] {'loss': 0.4174, 'grad_norm': 0.6690591422983134, 'learning_rate': 9.902070686776308e-06, 'epoch': 0.09} + 9%|▉ | 1113/12188 [2:20:02<22:43:04, 7.38s/it] 9%|▉ | 1114/12188 [2:20:09<22:07:45, 7.19s/it] {'loss': 0.4102, 'grad_norm': 0.6571093204348416, 'learning_rate': 9.901808829020215e-06, 'epoch': 0.09} + 9%|▉ | 1114/12188 [2:20:09<22:07:45, 7.19s/it] 9%|▉ | 1115/12188 [2:20:16<21:36:26, 7.02s/it] {'loss': 0.4306, 'grad_norm': 0.6518274895359748, 'learning_rate': 9.901546625106255e-06, 'epoch': 0.09} + 9%|▉ | 1115/12188 [2:20:16<21:36:26, 7.02s/it] 9%|▉ | 1116/12188 [2:20:23<21:50:37, 7.10s/it] {'loss': 0.4108, 'grad_norm': 0.6561174758492045, 'learning_rate': 9.90128407505295e-06, 'epoch': 0.09} + 9%|▉ | 1116/12188 [2:20:23<21:50:37, 7.10s/it] 9%|▉ | 1117/12188 [2:20:31<22:21:26, 7.27s/it] {'loss': 0.4815, 'grad_norm': 0.7336284310952887, 'learning_rate': 9.901021178878835e-06, 'epoch': 0.09} + 9%|▉ | 1117/12188 [2:20:31<22:21:26, 7.27s/it] 9%|▉ | 1118/12188 [2:20:38<22:25:22, 7.29s/it] {'loss': 0.4007, 'grad_norm': 0.6305698193131831, 'learning_rate': 9.900757936602481e-06, 'epoch': 0.09} + 9%|▉ | 1118/12188 [2:20:38<22:25:22, 7.29s/it] 9%|▉ | 1119/12188 [2:20:45<21:56:06, 7.13s/it] {'loss': 0.3909, 'grad_norm': 0.6293196847557636, 'learning_rate': 9.900494348242475e-06, 'epoch': 0.09} + 9%|▉ | 1119/12188 [2:20:45<21:56:06, 7.13s/it] 9%|▉ | 1120/12188 [2:20:52<22:19:33, 7.26s/it] {'loss': 0.437, 'grad_norm': 0.6997967076587683, 'learning_rate': 9.90023041381743e-06, 'epoch': 0.09} + 9%|▉ | 1120/12188 [2:20:52<22:19:33, 7.26s/it] 9%|▉ | 1121/12188 [2:20:59<21:46:08, 7.08s/it] {'loss': 0.3918, 'grad_norm': 0.6832521161089257, 'learning_rate': 9.899966133345988e-06, 'epoch': 0.09} + 9%|▉ | 1121/12188 [2:20:59<21:46:08, 7.08s/it] 9%|▉ | 1122/12188 [2:21:06<21:29:20, 6.99s/it] {'loss': 0.4141, 'grad_norm': 0.6461341985016275, 'learning_rate': 9.89970150684681e-06, 'epoch': 0.09} + 9%|▉ | 1122/12188 [2:21:06<21:29:20, 6.99s/it] 9%|▉ | 1123/12188 [2:21:13<21:45:41, 7.08s/it] {'loss': 0.4038, 'grad_norm': 0.6717038739884312, 'learning_rate': 9.899436534338582e-06, 'epoch': 0.09} + 9%|▉ | 1123/12188 [2:21:13<21:45:41, 7.08s/it] 9%|▉ | 1124/12188 [2:21:21<22:28:28, 7.31s/it] {'loss': 0.4306, 'grad_norm': 0.6566158609680398, 'learning_rate': 9.899171215840018e-06, 'epoch': 0.09} + 9%|▉ | 1124/12188 [2:21:21<22:28:28, 7.31s/it] 9%|▉ | 1125/12188 [2:21:28<22:30:13, 7.32s/it] {'loss': 0.4886, 'grad_norm': 0.6679261544639572, 'learning_rate': 9.898905551369854e-06, 'epoch': 0.09} + 9%|▉ | 1125/12188 [2:21:28<22:30:13, 7.32s/it] 9%|▉ | 1126/12188 [2:21:35<22:10:40, 7.22s/it] {'loss': 0.4054, 'grad_norm': 0.6540523649288594, 'learning_rate': 9.89863954094685e-06, 'epoch': 0.09} + 9%|▉ | 1126/12188 [2:21:35<22:10:40, 7.22s/it] 9%|▉ | 1127/12188 [2:21:43<22:40:57, 7.38s/it] {'loss': 0.406, 'grad_norm': 0.7123965381153851, 'learning_rate': 9.898373184589793e-06, 'epoch': 0.09} + 9%|▉ | 1127/12188 [2:21:43<22:40:57, 7.38s/it] 9%|▉ | 1128/12188 [2:21:51<22:48:17, 7.42s/it] {'loss': 0.4265, 'grad_norm': 0.9410553449851357, 'learning_rate': 9.898106482317491e-06, 'epoch': 0.09} + 9%|▉ | 1128/12188 [2:21:51<22:48:17, 7.42s/it] 9%|▉ | 1129/12188 [2:21:58<22:42:35, 7.39s/it] {'loss': 0.4332, 'grad_norm': 0.6315526830561401, 'learning_rate': 9.897839434148779e-06, 'epoch': 0.09} + 9%|▉ | 1129/12188 [2:21:58<22:42:35, 7.39s/it] 9%|▉ | 1130/12188 [2:22:06<23:17:24, 7.58s/it] {'loss': 0.3979, 'grad_norm': 0.64566449805873, 'learning_rate': 9.897572040102514e-06, 'epoch': 0.09} + 9%|▉ | 1130/12188 [2:22:06<23:17:24, 7.58s/it] 9%|▉ | 1131/12188 [2:22:13<23:11:42, 7.55s/it] {'loss': 0.4131, 'grad_norm': 0.6902002634118973, 'learning_rate': 9.89730430019758e-06, 'epoch': 0.09} + 9%|▉ | 1131/12188 [2:22:13<23:11:42, 7.55s/it] 9%|▉ | 1132/12188 [2:22:20<22:23:55, 7.29s/it] {'loss': 0.4281, 'grad_norm': 0.6604982349479028, 'learning_rate': 9.897036214452886e-06, 'epoch': 0.09} + 9%|▉ | 1132/12188 [2:22:20<22:23:55, 7.29s/it] 9%|▉ | 1133/12188 [2:22:27<22:07:36, 7.21s/it] {'loss': 0.4215, 'grad_norm': 0.7318828477310675, 'learning_rate': 9.896767782887362e-06, 'epoch': 0.09} + 9%|▉ | 1133/12188 [2:22:27<22:07:36, 7.21s/it] 9%|▉ | 1134/12188 [2:22:36<23:18:52, 7.59s/it] {'loss': 0.4172, 'grad_norm': 0.6330458481580741, 'learning_rate': 9.896499005519964e-06, 'epoch': 0.09} + 9%|▉ | 1134/12188 [2:22:36<23:18:52, 7.59s/it] 9%|▉ | 1135/12188 [2:22:42<22:24:39, 7.30s/it] {'loss': 0.4224, 'grad_norm': 0.6927238723795207, 'learning_rate': 9.896229882369674e-06, 'epoch': 0.09} + 9%|▉ | 1135/12188 [2:22:42<22:24:39, 7.30s/it] 9%|▉ | 1136/12188 [2:22:50<22:25:53, 7.31s/it] {'loss': 0.4138, 'grad_norm': 0.649251949907775, 'learning_rate': 9.895960413455495e-06, 'epoch': 0.09} + 9%|▉ | 1136/12188 [2:22:50<22:25:53, 7.31s/it] 9%|▉ | 1137/12188 [2:22:57<22:45:49, 7.42s/it] {'loss': 0.4551, 'grad_norm': 0.6445516325088413, 'learning_rate': 9.895690598796457e-06, 'epoch': 0.09} + 9%|▉ | 1137/12188 [2:22:57<22:45:49, 7.42s/it] 9%|▉ | 1138/12188 [2:23:05<22:49:10, 7.43s/it] {'loss': 0.4501, 'grad_norm': 0.752038870638604, 'learning_rate': 9.895420438411616e-06, 'epoch': 0.09} + 9%|▉ | 1138/12188 [2:23:05<22:49:10, 7.43s/it] 9%|▉ | 1139/12188 [2:23:12<22:33:57, 7.35s/it] {'loss': 0.4306, 'grad_norm': 0.6952185683723591, 'learning_rate': 9.895149932320048e-06, 'epoch': 0.09} + 9%|▉ | 1139/12188 [2:23:12<22:33:57, 7.35s/it] 9%|▉ | 1140/12188 [2:23:19<22:09:16, 7.22s/it] {'loss': 0.4106, 'grad_norm': 0.7176151175432689, 'learning_rate': 9.894879080540856e-06, 'epoch': 0.09} + 9%|▉ | 1140/12188 [2:23:19<22:09:16, 7.22s/it] 9%|▉ | 1141/12188 [2:23:26<22:25:53, 7.31s/it] {'loss': 0.4344, 'grad_norm': 0.628441444353533, 'learning_rate': 9.894607883093169e-06, 'epoch': 0.09} + 9%|▉ | 1141/12188 [2:23:26<22:25:53, 7.31s/it] 9%|▉ | 1142/12188 [2:23:34<22:30:22, 7.33s/it] {'loss': 0.4168, 'grad_norm': 0.7381690220771923, 'learning_rate': 9.894336339996133e-06, 'epoch': 0.09} + 9%|▉ | 1142/12188 [2:23:34<22:30:22, 7.33s/it] 9%|▉ | 1143/12188 [2:23:41<22:19:53, 7.28s/it] {'loss': 0.4015, 'grad_norm': 0.6450215709130285, 'learning_rate': 9.894064451268933e-06, 'epoch': 0.09} + 9%|▉ | 1143/12188 [2:23:41<22:19:53, 7.28s/it] 9%|▉ | 1144/12188 [2:23:48<22:04:15, 7.19s/it] {'loss': 0.4122, 'grad_norm': 0.6903120987355906, 'learning_rate': 9.893792216930762e-06, 'epoch': 0.09} + 9%|▉ | 1144/12188 [2:23:48<22:04:15, 7.19s/it] 9%|▉ | 1145/12188 [2:23:55<21:40:36, 7.07s/it] {'loss': 0.4192, 'grad_norm': 0.6353743144411274, 'learning_rate': 9.893519637000846e-06, 'epoch': 0.09} + 9%|▉ | 1145/12188 [2:23:55<21:40:36, 7.07s/it] 9%|▉ | 1146/12188 [2:24:01<21:25:28, 6.99s/it] {'loss': 0.364, 'grad_norm': 0.5969026845530635, 'learning_rate': 9.893246711498438e-06, 'epoch': 0.09} + 9%|▉ | 1146/12188 [2:24:01<21:25:28, 6.99s/it] 9%|▉ | 1147/12188 [2:24:08<21:13:44, 6.92s/it] {'loss': 0.4905, 'grad_norm': 0.6359288862679797, 'learning_rate': 9.892973440442809e-06, 'epoch': 0.09} + 9%|▉ | 1147/12188 [2:24:08<21:13:44, 6.92s/it] 9%|▉ | 1148/12188 [2:24:15<21:20:45, 6.96s/it] {'loss': 0.4121, 'grad_norm': 0.6845632672755336, 'learning_rate': 9.892699823853254e-06, 'epoch': 0.09} + 9%|▉ | 1148/12188 [2:24:15<21:20:45, 6.96s/it] 9%|▉ | 1149/12188 [2:24:22<21:16:52, 6.94s/it] {'loss': 0.3779, 'grad_norm': 0.671869539033308, 'learning_rate': 9.8924258617491e-06, 'epoch': 0.09} + 9%|▉ | 1149/12188 [2:24:22<21:16:52, 6.94s/it] 9%|▉ | 1150/12188 [2:24:30<21:43:18, 7.08s/it] {'loss': 0.4157, 'grad_norm': 0.6267614607282824, 'learning_rate': 9.89215155414969e-06, 'epoch': 0.09} + 9%|▉ | 1150/12188 [2:24:30<21:43:18, 7.08s/it] 9%|▉ | 1151/12188 [2:24:36<21:20:54, 6.96s/it] {'loss': 0.4234, 'grad_norm': 0.7241409693711145, 'learning_rate': 9.8918769010744e-06, 'epoch': 0.09} + 9%|▉ | 1151/12188 [2:24:36<21:20:54, 6.96s/it] 9%|▉ | 1152/12188 [2:24:44<21:59:54, 7.18s/it] {'loss': 0.4095, 'grad_norm': 0.6900135158853797, 'learning_rate': 9.891601902542621e-06, 'epoch': 0.09} + 9%|▉ | 1152/12188 [2:24:44<21:59:54, 7.18s/it] 9%|▉ | 1153/12188 [2:24:52<22:58:08, 7.49s/it] {'loss': 0.4339, 'grad_norm': 0.6972742051145224, 'learning_rate': 9.891326558573774e-06, 'epoch': 0.09} + 9%|▉ | 1153/12188 [2:24:52<22:58:08, 7.49s/it] 9%|▉ | 1154/12188 [2:25:00<23:07:12, 7.54s/it] {'loss': 0.4095, 'grad_norm': 0.6356201051062806, 'learning_rate': 9.891050869187305e-06, 'epoch': 0.09} + 9%|▉ | 1154/12188 [2:25:00<23:07:12, 7.54s/it] 9%|▉ | 1155/12188 [2:25:07<23:13:36, 7.58s/it] {'loss': 0.4307, 'grad_norm': 0.6596740323744534, 'learning_rate': 9.89077483440268e-06, 'epoch': 0.09} + 9%|▉ | 1155/12188 [2:25:07<23:13:36, 7.58s/it] 9%|▉ | 1156/12188 [2:25:14<22:41:43, 7.41s/it] {'loss': 0.4165, 'grad_norm': 0.6775343311021543, 'learning_rate': 9.890498454239396e-06, 'epoch': 0.09} + 9%|▉ | 1156/12188 [2:25:14<22:41:43, 7.41s/it] 9%|▉ | 1157/12188 [2:25:21<22:17:04, 7.27s/it] {'loss': 0.3974, 'grad_norm': 0.6562848619162858, 'learning_rate': 9.89022172871697e-06, 'epoch': 0.09} + 9%|▉ | 1157/12188 [2:25:21<22:17:04, 7.27s/it] 10%|▉ | 1158/12188 [2:25:28<21:56:16, 7.16s/it] {'loss': 0.4112, 'grad_norm': 0.6299477003946772, 'learning_rate': 9.889944657854937e-06, 'epoch': 0.1} + 10%|▉ | 1158/12188 [2:25:28<21:56:16, 7.16s/it] 10%|▉ | 1159/12188 [2:25:36<22:09:25, 7.23s/it] {'loss': 0.4244, 'grad_norm': 0.7317256411980955, 'learning_rate': 9.889667241672872e-06, 'epoch': 0.1} + 10%|▉ | 1159/12188 [2:25:36<22:09:25, 7.23s/it] 10%|▉ | 1160/12188 [2:25:46<24:35:15, 8.03s/it] {'loss': 0.4017, 'grad_norm': 0.7601706411362783, 'learning_rate': 9.889389480190361e-06, 'epoch': 0.1} + 10%|▉ | 1160/12188 [2:25:46<24:35:15, 8.03s/it] 10%|▉ | 1161/12188 [2:25:53<23:57:28, 7.82s/it] {'loss': 0.446, 'grad_norm': 0.6806303852314602, 'learning_rate': 9.889111373427021e-06, 'epoch': 0.1} + 10%|▉ | 1161/12188 [2:25:53<23:57:28, 7.82s/it] 10%|▉ | 1162/12188 [2:26:00<23:40:06, 7.73s/it] {'loss': 0.4371, 'grad_norm': 0.6397863795597569, 'learning_rate': 9.888832921402491e-06, 'epoch': 0.1} + 10%|▉ | 1162/12188 [2:26:00<23:40:06, 7.73s/it] 10%|▉ | 1163/12188 [2:26:08<23:27:51, 7.66s/it] {'loss': 0.4147, 'grad_norm': 0.6946839682036868, 'learning_rate': 9.888554124136436e-06, 'epoch': 0.1} + 10%|▉ | 1163/12188 [2:26:08<23:27:51, 7.66s/it] 10%|▉ | 1164/12188 [2:26:16<24:13:19, 7.91s/it] {'loss': 0.4094, 'grad_norm': 0.6542115500813515, 'learning_rate': 9.88827498164854e-06, 'epoch': 0.1} + 10%|▉ | 1164/12188 [2:26:16<24:13:19, 7.91s/it] 10%|▉ | 1165/12188 [2:26:24<23:46:12, 7.76s/it] {'loss': 0.4558, 'grad_norm': 0.6603906768724794, 'learning_rate': 9.887995493958519e-06, 'epoch': 0.1} + 10%|▉ | 1165/12188 [2:26:24<23:46:12, 7.76s/it] 10%|▉ | 1166/12188 [2:26:32<23:55:10, 7.81s/it] {'loss': 0.4243, 'grad_norm': 0.6075697495063458, 'learning_rate': 9.88771566108611e-06, 'epoch': 0.1} + 10%|▉ | 1166/12188 [2:26:32<23:55:10, 7.81s/it] 10%|▉ | 1167/12188 [2:26:39<23:20:19, 7.62s/it] {'loss': 0.4392, 'grad_norm': 0.6880008782152812, 'learning_rate': 9.887435483051074e-06, 'epoch': 0.1} + 10%|▉ | 1167/12188 [2:26:39<23:20:19, 7.62s/it] 10%|▉ | 1168/12188 [2:26:46<22:49:07, 7.45s/it] {'loss': 0.4381, 'grad_norm': 0.7317805499474413, 'learning_rate': 9.887154959873196e-06, 'epoch': 0.1} + 10%|▉ | 1168/12188 [2:26:46<22:49:07, 7.45s/it] 10%|▉ | 1169/12188 [2:26:53<22:10:21, 7.24s/it] {'loss': 0.4285, 'grad_norm': 0.6771479150337498, 'learning_rate': 9.886874091572287e-06, 'epoch': 0.1} + 10%|▉ | 1169/12188 [2:26:53<22:10:21, 7.24s/it] 10%|▉ | 1170/12188 [2:27:00<21:52:03, 7.14s/it] {'loss': 0.4417, 'grad_norm': 0.7051578388913949, 'learning_rate': 9.88659287816818e-06, 'epoch': 0.1} + 10%|▉ | 1170/12188 [2:27:00<21:52:03, 7.14s/it] 10%|▉ | 1171/12188 [2:27:09<24:07:23, 7.88s/it] {'loss': 0.4003, 'grad_norm': 0.6506299000712961, 'learning_rate': 9.886311319680736e-06, 'epoch': 0.1} + 10%|▉ | 1171/12188 [2:27:09<24:07:23, 7.88s/it] 10%|▉ | 1172/12188 [2:27:16<23:28:59, 7.67s/it] {'loss': 0.4143, 'grad_norm': 0.6609675888923994, 'learning_rate': 9.886029416129837e-06, 'epoch': 0.1} + 10%|▉ | 1172/12188 [2:27:16<23:28:59, 7.67s/it] 10%|▉ | 1173/12188 [2:27:24<23:20:22, 7.63s/it] {'loss': 0.394, 'grad_norm': 0.6666426904436271, 'learning_rate': 9.88574716753539e-06, 'epoch': 0.1} + 10%|▉ | 1173/12188 [2:27:24<23:20:22, 7.63s/it] 10%|▉ | 1174/12188 [2:27:31<22:45:11, 7.44s/it] {'loss': 0.4094, 'grad_norm': 0.6833645393348772, 'learning_rate': 9.885464573917328e-06, 'epoch': 0.1} + 10%|▉ | 1174/12188 [2:27:31<22:45:11, 7.44s/it] 10%|▉ | 1175/12188 [2:27:38<22:38:49, 7.40s/it] {'loss': 0.4324, 'grad_norm': 0.6862142813072858, 'learning_rate': 9.885181635295606e-06, 'epoch': 0.1} + 10%|▉ | 1175/12188 [2:27:38<22:38:49, 7.40s/it] 10%|▉ | 1176/12188 [2:27:46<22:45:50, 7.44s/it] {'loss': 0.4314, 'grad_norm': 0.6806571001621979, 'learning_rate': 9.884898351690206e-06, 'epoch': 0.1} + 10%|▉ | 1176/12188 [2:27:46<22:45:50, 7.44s/it] 10%|▉ | 1177/12188 [2:27:54<23:06:10, 7.55s/it] {'loss': 0.3763, 'grad_norm': 0.6522613530013103, 'learning_rate': 9.884614723121133e-06, 'epoch': 0.1} + 10%|▉ | 1177/12188 [2:27:54<23:06:10, 7.55s/it] 10%|▉ | 1178/12188 [2:28:02<23:28:41, 7.68s/it] {'loss': 0.3928, 'grad_norm': 0.7242211194245459, 'learning_rate': 9.884330749608417e-06, 'epoch': 0.1} + 10%|▉ | 1178/12188 [2:28:02<23:28:41, 7.68s/it] 10%|▉ | 1179/12188 [2:28:09<23:06:30, 7.56s/it] {'loss': 0.3942, 'grad_norm': 0.6420712561565053, 'learning_rate': 9.88404643117211e-06, 'epoch': 0.1} + 10%|▉ | 1179/12188 [2:28:09<23:06:30, 7.56s/it] 10%|▉ | 1180/12188 [2:28:16<22:56:04, 7.50s/it] {'loss': 0.4461, 'grad_norm': 0.7553004772738057, 'learning_rate': 9.883761767832289e-06, 'epoch': 0.1} + 10%|▉ | 1180/12188 [2:28:16<22:56:04, 7.50s/it] 10%|▉ | 1181/12188 [2:28:24<22:46:24, 7.45s/it] {'loss': 0.3884, 'grad_norm': 0.6770050373855221, 'learning_rate': 9.883476759609061e-06, 'epoch': 0.1} + 10%|▉ | 1181/12188 [2:28:24<22:46:24, 7.45s/it] 10%|▉ | 1182/12188 [2:28:31<22:24:51, 7.33s/it] {'loss': 0.4331, 'grad_norm': 0.7107389845073202, 'learning_rate': 9.883191406522547e-06, 'epoch': 0.1} + 10%|▉ | 1182/12188 [2:28:31<22:24:51, 7.33s/it] 10%|▉ | 1183/12188 [2:28:38<22:04:54, 7.22s/it] {'loss': 0.3662, 'grad_norm': 0.6058284775455465, 'learning_rate': 9.882905708592905e-06, 'epoch': 0.1} + 10%|▉ | 1183/12188 [2:28:38<22:04:54, 7.22s/it] 10%|▉ | 1184/12188 [2:28:44<21:33:02, 7.05s/it] {'loss': 0.4359, 'grad_norm': 0.6869063871921786, 'learning_rate': 9.882619665840306e-06, 'epoch': 0.1} + 10%|▉ | 1184/12188 [2:28:44<21:33:02, 7.05s/it] 10%|▉ | 1185/12188 [2:28:52<21:44:13, 7.11s/it] {'loss': 0.4225, 'grad_norm': 0.7300115430989915, 'learning_rate': 9.882333278284949e-06, 'epoch': 0.1} + 10%|▉ | 1185/12188 [2:28:52<21:44:13, 7.11s/it] 10%|▉ | 1186/12188 [2:29:00<23:02:04, 7.54s/it] {'loss': 0.3808, 'grad_norm': 0.6982908587800185, 'learning_rate': 9.88204654594706e-06, 'epoch': 0.1} + 10%|▉ | 1186/12188 [2:29:00<23:02:04, 7.54s/it] 10%|▉ | 1187/12188 [2:29:07<22:42:40, 7.43s/it] {'loss': 0.4431, 'grad_norm': 0.7438174129886317, 'learning_rate': 9.881759468846887e-06, 'epoch': 0.1} + 10%|▉ | 1187/12188 [2:29:07<22:42:40, 7.43s/it] 10%|▉ | 1188/12188 [2:29:15<23:25:23, 7.67s/it] {'loss': 0.4296, 'grad_norm': 0.7344692522329485, 'learning_rate': 9.881472047004706e-06, 'epoch': 0.1} + 10%|▉ | 1188/12188 [2:29:15<23:25:23, 7.67s/it] 10%|▉ | 1189/12188 [2:29:22<22:46:51, 7.46s/it] {'loss': 0.4135, 'grad_norm': 0.6566589764090393, 'learning_rate': 9.88118428044081e-06, 'epoch': 0.1} + 10%|▉ | 1189/12188 [2:29:22<22:46:51, 7.46s/it] 10%|▉ | 1190/12188 [2:29:29<22:20:33, 7.31s/it] {'loss': 0.4198, 'grad_norm': 0.723748294584925, 'learning_rate': 9.880896169175524e-06, 'epoch': 0.1} + 10%|▉ | 1190/12188 [2:29:29<22:20:33, 7.31s/it] 10%|▉ | 1191/12188 [2:29:38<23:58:23, 7.85s/it] {'loss': 0.3826, 'grad_norm': 0.6595411840386957, 'learning_rate': 9.880607713229191e-06, 'epoch': 0.1} + 10%|▉ | 1191/12188 [2:29:38<23:58:23, 7.85s/it] 10%|▉ | 1192/12188 [2:29:45<23:10:59, 7.59s/it] {'loss': 0.4208, 'grad_norm': 0.6960174216244738, 'learning_rate': 9.880318912622181e-06, 'epoch': 0.1} + 10%|▉ | 1192/12188 [2:29:45<23:10:59, 7.59s/it] 10%|▉ | 1193/12188 [2:29:52<22:37:06, 7.41s/it] {'loss': 0.4485, 'grad_norm': 0.6477158474312824, 'learning_rate': 9.880029767374892e-06, 'epoch': 0.1} + 10%|▉ | 1193/12188 [2:29:52<22:37:06, 7.41s/it] 10%|▉ | 1194/12188 [2:30:00<22:27:07, 7.35s/it] {'loss': 0.4481, 'grad_norm': 0.7296410717560162, 'learning_rate': 9.87974027750774e-06, 'epoch': 0.1} + 10%|▉ | 1194/12188 [2:30:00<22:27:07, 7.35s/it] 10%|▉ | 1195/12188 [2:30:07<22:03:04, 7.22s/it] {'loss': 0.422, 'grad_norm': 0.602611060795991, 'learning_rate': 9.879450443041172e-06, 'epoch': 0.1} + 10%|▉ | 1195/12188 [2:30:07<22:03:04, 7.22s/it] 10%|▉ | 1196/12188 [2:30:14<22:09:15, 7.26s/it] {'loss': 0.444, 'grad_norm': 0.7155220574958134, 'learning_rate': 9.879160263995652e-06, 'epoch': 0.1} + 10%|▉ | 1196/12188 [2:30:14<22:09:15, 7.26s/it] 10%|▉ | 1197/12188 [2:30:21<21:56:19, 7.19s/it] {'loss': 0.4362, 'grad_norm': 0.6447404397894257, 'learning_rate': 9.878869740391672e-06, 'epoch': 0.1} + 10%|▉ | 1197/12188 [2:30:21<21:56:19, 7.19s/it] 10%|▉ | 1198/12188 [2:30:28<21:38:55, 7.09s/it] {'loss': 0.3968, 'grad_norm': 0.6476696985064385, 'learning_rate': 9.87857887224975e-06, 'epoch': 0.1} + 10%|▉ | 1198/12188 [2:30:28<21:38:55, 7.09s/it] 10%|▉ | 1199/12188 [2:30:36<22:21:46, 7.33s/it] {'loss': 0.3666, 'grad_norm': 0.6354096758634412, 'learning_rate': 9.878287659590427e-06, 'epoch': 0.1} + 10%|▉ | 1199/12188 [2:30:36<22:21:46, 7.33s/it] 10%|▉ | 1200/12188 [2:30:42<21:52:38, 7.17s/it] {'loss': 0.4291, 'grad_norm': 0.6573453437167557, 'learning_rate': 9.877996102434266e-06, 'epoch': 0.1} + 10%|▉ | 1200/12188 [2:30:43<21:52:38, 7.17s/it] 10%|▉ | 1201/12188 [2:30:51<22:45:25, 7.46s/it] {'loss': 0.4116, 'grad_norm': 0.6571449108824587, 'learning_rate': 9.877704200801858e-06, 'epoch': 0.1} + 10%|▉ | 1201/12188 [2:30:51<22:45:25, 7.46s/it] 10%|▉ | 1202/12188 [2:30:58<22:27:51, 7.36s/it] {'loss': 0.3901, 'grad_norm': 0.6554833933456761, 'learning_rate': 9.877411954713816e-06, 'epoch': 0.1} + 10%|▉ | 1202/12188 [2:30:58<22:27:51, 7.36s/it] 10%|▉ | 1203/12188 [2:31:05<22:08:09, 7.25s/it] {'loss': 0.3981, 'grad_norm': 0.6681084864792033, 'learning_rate': 9.877119364190778e-06, 'epoch': 0.1} + 10%|▉ | 1203/12188 [2:31:05<22:08:09, 7.25s/it] 10%|▉ | 1204/12188 [2:31:12<22:08:47, 7.26s/it] {'loss': 0.3619, 'grad_norm': 0.6976653346389601, 'learning_rate': 9.876826429253405e-06, 'epoch': 0.1} + 10%|▉ | 1204/12188 [2:31:12<22:08:47, 7.26s/it] 10%|▉ | 1205/12188 [2:31:19<22:17:34, 7.31s/it] {'loss': 0.4409, 'grad_norm': 0.6366165665130963, 'learning_rate': 9.876533149922387e-06, 'epoch': 0.1} + 10%|▉ | 1205/12188 [2:31:19<22:17:34, 7.31s/it] 10%|▉ | 1206/12188 [2:31:28<23:03:48, 7.56s/it] {'loss': 0.4304, 'grad_norm': 0.7170383214243727, 'learning_rate': 9.876239526218431e-06, 'epoch': 0.1} + 10%|▉ | 1206/12188 [2:31:28<23:03:48, 7.56s/it] 10%|▉ | 1207/12188 [2:31:34<22:26:46, 7.36s/it] {'loss': 0.4218, 'grad_norm': 0.6558353821819263, 'learning_rate': 9.875945558162274e-06, 'epoch': 0.1} + 10%|▉ | 1207/12188 [2:31:35<22:26:46, 7.36s/it] 10%|▉ | 1208/12188 [2:31:42<22:41:55, 7.44s/it] {'loss': 0.371, 'grad_norm': 0.6379859670242897, 'learning_rate': 9.875651245774675e-06, 'epoch': 0.1} + 10%|▉ | 1208/12188 [2:31:42<22:41:55, 7.44s/it] 10%|▉ | 1209/12188 [2:31:49<22:16:39, 7.30s/it] {'loss': 0.4375, 'grad_norm': 0.6871401756004878, 'learning_rate': 9.87535658907642e-06, 'epoch': 0.1} + 10%|▉ | 1209/12188 [2:31:49<22:16:39, 7.30s/it] 10%|▉ | 1210/12188 [2:31:56<21:47:34, 7.15s/it] {'loss': 0.4037, 'grad_norm': 0.679834868080823, 'learning_rate': 9.875061588088316e-06, 'epoch': 0.1} + 10%|▉ | 1210/12188 [2:31:56<21:47:34, 7.15s/it] 10%|▉ | 1211/12188 [2:32:03<21:45:27, 7.14s/it] {'loss': 0.4299, 'grad_norm': 0.7068987327506728, 'learning_rate': 9.874766242831193e-06, 'epoch': 0.1} + 10%|▉ | 1211/12188 [2:32:03<21:45:27, 7.14s/it] 10%|▉ | 1212/12188 [2:32:10<21:49:07, 7.16s/it] {'loss': 0.4601, 'grad_norm': 0.6433934867282338, 'learning_rate': 9.87447055332591e-06, 'epoch': 0.1} + 10%|▉ | 1212/12188 [2:32:10<21:49:07, 7.16s/it] 10%|▉ | 1213/12188 [2:32:17<21:44:33, 7.13s/it] {'loss': 0.3687, 'grad_norm': 0.6561002884617048, 'learning_rate': 9.874174519593349e-06, 'epoch': 0.1} + 10%|▉ | 1213/12188 [2:32:17<21:44:33, 7.13s/it] 10%|▉ | 1214/12188 [2:32:24<21:22:44, 7.01s/it] {'loss': 0.391, 'grad_norm': 0.6710283268048053, 'learning_rate': 9.873878141654414e-06, 'epoch': 0.1} + 10%|▉ | 1214/12188 [2:32:24<21:22:44, 7.01s/it] 10%|▉ | 1215/12188 [2:32:31<21:34:26, 7.08s/it] {'loss': 0.4227, 'grad_norm': 0.690449917003757, 'learning_rate': 9.873581419530035e-06, 'epoch': 0.1} + 10%|▉ | 1215/12188 [2:32:31<21:34:26, 7.08s/it] 10%|▉ | 1216/12188 [2:32:39<22:05:26, 7.25s/it] {'loss': 0.3972, 'grad_norm': 0.6732269491556577, 'learning_rate': 9.873284353241166e-06, 'epoch': 0.1} + 10%|▉ | 1216/12188 [2:32:39<22:05:26, 7.25s/it] 10%|▉ | 1217/12188 [2:32:46<22:06:54, 7.26s/it] {'loss': 0.3826, 'grad_norm': 0.6612178590633094, 'learning_rate': 9.872986942808784e-06, 'epoch': 0.1} + 10%|▉ | 1217/12188 [2:32:46<22:06:54, 7.26s/it] 10%|▉ | 1218/12188 [2:32:53<21:30:49, 7.06s/it] {'loss': 0.4136, 'grad_norm': 0.6890354739697394, 'learning_rate': 9.872689188253895e-06, 'epoch': 0.1} + 10%|▉ | 1218/12188 [2:32:53<21:30:49, 7.06s/it] 10%|█ | 1219/12188 [2:33:00<21:45:53, 7.14s/it] {'loss': 0.3991, 'grad_norm': 0.6571427659343753, 'learning_rate': 9.872391089597523e-06, 'epoch': 0.1} + 10%|█ | 1219/12188 [2:33:00<21:45:53, 7.14s/it] 10%|█ | 1220/12188 [2:33:07<21:52:33, 7.18s/it] {'loss': 0.3969, 'grad_norm': 0.6670393080690409, 'learning_rate': 9.872092646860723e-06, 'epoch': 0.1} + 10%|█ | 1220/12188 [2:33:07<21:52:33, 7.18s/it] 10%|█ | 1221/12188 [2:33:14<21:40:00, 7.11s/it] {'loss': 0.4446, 'grad_norm': 0.6820732034265237, 'learning_rate': 9.871793860064566e-06, 'epoch': 0.1} + 10%|█ | 1221/12188 [2:33:14<21:40:00, 7.11s/it] 10%|█ | 1222/12188 [2:33:21<21:25:42, 7.03s/it] {'loss': 0.4352, 'grad_norm': 0.6882090178061364, 'learning_rate': 9.871494729230153e-06, 'epoch': 0.1} + 10%|█ | 1222/12188 [2:33:21<21:25:42, 7.03s/it] 10%|█ | 1223/12188 [2:33:28<21:12:11, 6.96s/it] {'loss': 0.4019, 'grad_norm': 0.6028845346912963, 'learning_rate': 9.87119525437861e-06, 'epoch': 0.1} + 10%|█ | 1223/12188 [2:33:28<21:12:11, 6.96s/it] 10%|█ | 1224/12188 [2:33:35<21:05:40, 6.93s/it] {'loss': 0.4326, 'grad_norm': 0.6477729322693746, 'learning_rate': 9.870895435531086e-06, 'epoch': 0.1} + 10%|█ | 1224/12188 [2:33:35<21:05:40, 6.93s/it] 10%|█ | 1225/12188 [2:33:42<21:27:18, 7.05s/it] {'loss': 0.432, 'grad_norm': 0.6645127741202622, 'learning_rate': 9.870595272708749e-06, 'epoch': 0.1} + 10%|█ | 1225/12188 [2:33:42<21:27:18, 7.05s/it] 10%|█ | 1226/12188 [2:33:49<21:11:03, 6.96s/it] {'loss': 0.4591, 'grad_norm': 0.711517408515484, 'learning_rate': 9.8702947659328e-06, 'epoch': 0.1} + 10%|█ | 1226/12188 [2:33:49<21:11:03, 6.96s/it] 10%|█ | 1227/12188 [2:33:56<21:19:26, 7.00s/it] {'loss': 0.4157, 'grad_norm': 0.6512117563742844, 'learning_rate': 9.869993915224459e-06, 'epoch': 0.1} + 10%|█ | 1227/12188 [2:33:56<21:19:26, 7.00s/it] 10%|█ | 1228/12188 [2:34:04<22:13:52, 7.30s/it] {'loss': 0.4189, 'grad_norm': 0.6449312195160961, 'learning_rate': 9.869692720604973e-06, 'epoch': 0.1} + 10%|█ | 1228/12188 [2:34:04<22:13:52, 7.30s/it] 10%|█ | 1229/12188 [2:34:12<23:05:31, 7.59s/it] {'loss': 0.4308, 'grad_norm': 0.6481266433904177, 'learning_rate': 9.86939118209561e-06, 'epoch': 0.1} + 10%|█ | 1229/12188 [2:34:12<23:05:31, 7.59s/it] 10%|█ | 1230/12188 [2:34:19<22:30:01, 7.39s/it] {'loss': 0.4255, 'grad_norm': 0.6737536916479572, 'learning_rate': 9.869089299717665e-06, 'epoch': 0.1} + 10%|█ | 1230/12188 [2:34:19<22:30:01, 7.39s/it] 10%|█ | 1231/12188 [2:34:26<22:02:56, 7.24s/it] {'loss': 0.4291, 'grad_norm': 0.6957841509529012, 'learning_rate': 9.868787073492457e-06, 'epoch': 0.1} + 10%|█ | 1231/12188 [2:34:26<22:02:56, 7.24s/it] 10%|█ | 1232/12188 [2:34:33<21:49:30, 7.17s/it] {'loss': 0.4439, 'grad_norm': 0.65006099699815, 'learning_rate': 9.868484503441329e-06, 'epoch': 0.1} + 10%|█ | 1232/12188 [2:34:33<21:49:30, 7.17s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 10%|█ | 1233/12188 [2:34:39<20:18:54, 6.68s/it] {'loss': 0.7858, 'grad_norm': 0.991419344591193, 'learning_rate': 9.868181589585644e-06, 'epoch': 0.1} + 10%|█ | 1233/12188 [2:34:39<20:18:54, 6.68s/it] 10%|█ | 1234/12188 [2:34:46<21:10:32, 6.96s/it] {'loss': 0.3862, 'grad_norm': 0.685237521233886, 'learning_rate': 9.8678783319468e-06, 'epoch': 0.1} + 10%|█ | 1234/12188 [2:34:46<21:10:32, 6.96s/it] 10%|█ | 1235/12188 [2:34:53<21:02:57, 6.92s/it] {'loss': 0.4262, 'grad_norm': 0.6602605502174269, 'learning_rate': 9.867574730546207e-06, 'epoch': 0.1} + 10%|█ | 1235/12188 [2:34:53<21:02:57, 6.92s/it] 10%|█ | 1236/12188 [2:35:00<21:09:44, 6.96s/it] {'loss': 0.3976, 'grad_norm': 0.6771685236067592, 'learning_rate': 9.867270785405308e-06, 'epoch': 0.1} + 10%|█ | 1236/12188 [2:35:00<21:09:44, 6.96s/it] 10%|█ | 1237/12188 [2:35:09<23:06:47, 7.60s/it] {'loss': 0.4139, 'grad_norm': 0.715946236397249, 'learning_rate': 9.866966496545565e-06, 'epoch': 0.1} + 10%|█ | 1237/12188 [2:35:09<23:06:47, 7.60s/it] 10%|█ | 1238/12188 [2:35:16<22:24:44, 7.37s/it] {'loss': 0.43, 'grad_norm': 0.7522121805778265, 'learning_rate': 9.866661863988466e-06, 'epoch': 0.1} + 10%|█ | 1238/12188 [2:35:16<22:24:44, 7.37s/it] 10%|█ | 1239/12188 [2:35:23<21:48:00, 7.17s/it] {'loss': 0.3915, 'grad_norm': 0.6479778470336959, 'learning_rate': 9.866356887755529e-06, 'epoch': 0.1} + 10%|█ | 1239/12188 [2:35:23<21:48:00, 7.17s/it] 10%|█ | 1240/12188 [2:35:30<21:52:14, 7.19s/it] {'loss': 0.3823, 'grad_norm': 0.6669648557311907, 'learning_rate': 9.866051567868284e-06, 'epoch': 0.1} + 10%|█ | 1240/12188 [2:35:30<21:52:14, 7.19s/it] 10%|█ | 1241/12188 [2:35:37<21:54:03, 7.20s/it] {'loss': 0.4111, 'grad_norm': 0.6340208922718302, 'learning_rate': 9.865745904348296e-06, 'epoch': 0.1} + 10%|█ | 1241/12188 [2:35:37<21:54:03, 7.20s/it] 10%|█ | 1242/12188 [2:35:44<21:37:04, 7.11s/it] {'loss': 0.42, 'grad_norm': 0.6704838790522455, 'learning_rate': 9.865439897217151e-06, 'epoch': 0.1} + 10%|█ | 1242/12188 [2:35:44<21:37:04, 7.11s/it] 10%|█ | 1243/12188 [2:35:54<24:16:14, 7.98s/it] {'loss': 0.4024, 'grad_norm': 0.6852840341495134, 'learning_rate': 9.865133546496455e-06, 'epoch': 0.1} + 10%|█ | 1243/12188 [2:35:54<24:16:14, 7.98s/it] 10%|█ | 1244/12188 [2:36:04<25:57:40, 8.54s/it] {'loss': 0.4576, 'grad_norm': 0.6786879062856247, 'learning_rate': 9.864826852207846e-06, 'epoch': 0.1} + 10%|█ | 1244/12188 [2:36:04<25:57:40, 8.54s/it] 10%|█ | 1245/12188 [2:36:11<24:36:48, 8.10s/it] {'loss': 0.4398, 'grad_norm': 0.6719620520994852, 'learning_rate': 9.86451981437298e-06, 'epoch': 0.1} + 10%|█ | 1245/12188 [2:36:11<24:36:48, 8.10s/it] 10%|█ | 1246/12188 [2:36:18<23:21:01, 7.68s/it] {'loss': 0.4325, 'grad_norm': 0.6665145178916769, 'learning_rate': 9.86421243301354e-06, 'epoch': 0.1} + 10%|█ | 1246/12188 [2:36:18<23:21:01, 7.68s/it] 10%|█ | 1247/12188 [2:36:25<22:49:30, 7.51s/it] {'loss': 0.4072, 'grad_norm': 0.6511827504146804, 'learning_rate': 9.863904708151233e-06, 'epoch': 0.1} + 10%|█ | 1247/12188 [2:36:25<22:49:30, 7.51s/it] 10%|█ | 1248/12188 [2:36:32<22:10:06, 7.29s/it] {'loss': 0.4183, 'grad_norm': 0.629715654295075, 'learning_rate': 9.86359663980779e-06, 'epoch': 0.1} + 10%|█ | 1248/12188 [2:36:32<22:10:06, 7.29s/it] 10%|█ | 1249/12188 [2:36:39<22:22:38, 7.36s/it] {'loss': 0.3808, 'grad_norm': 0.6412015739919525, 'learning_rate': 9.863288228004967e-06, 'epoch': 0.1} + 10%|█ | 1249/12188 [2:36:39<22:22:38, 7.36s/it] 10%|█ | 1250/12188 [2:36:47<22:47:40, 7.50s/it] {'loss': 0.4265, 'grad_norm': 0.6525704759376479, 'learning_rate': 9.862979472764542e-06, 'epoch': 0.1} + 10%|█ | 1250/12188 [2:36:47<22:47:40, 7.50s/it] 10%|█ | 1251/12188 [2:36:54<22:23:08, 7.37s/it] {'loss': 0.3877, 'grad_norm': 0.6255580689886147, 'learning_rate': 9.862670374108321e-06, 'epoch': 0.1} + 10%|█ | 1251/12188 [2:36:54<22:23:08, 7.37s/it] 10%|█ | 1252/12188 [2:37:02<22:40:52, 7.47s/it] {'loss': 0.3935, 'grad_norm': 0.6667134140316704, 'learning_rate': 9.862360932058128e-06, 'epoch': 0.1} + 10%|█ | 1252/12188 [2:37:02<22:40:52, 7.47s/it] 10%|█ | 1253/12188 [2:37:09<22:03:20, 7.26s/it] {'loss': 0.4426, 'grad_norm': 0.7080637942592976, 'learning_rate': 9.86205114663582e-06, 'epoch': 0.1} + 10%|█ | 1253/12188 [2:37:09<22:03:20, 7.26s/it] 10%|█ | 1254/12188 [2:37:15<21:29:47, 7.08s/it] {'loss': 0.4544, 'grad_norm': 0.6990533234173746, 'learning_rate': 9.86174101786327e-06, 'epoch': 0.1} + 10%|█ | 1254/12188 [2:37:15<21:29:47, 7.08s/it] 10%|█ | 1255/12188 [2:37:25<24:05:19, 7.93s/it] {'loss': 0.3719, 'grad_norm': 0.6384122378643747, 'learning_rate': 9.861430545762383e-06, 'epoch': 0.1} + 10%|█ | 1255/12188 [2:37:25<24:05:19, 7.93s/it] 10%|█ | 1256/12188 [2:37:32<23:10:48, 7.63s/it] {'loss': 0.3907, 'grad_norm': 0.6706019009735944, 'learning_rate': 9.86111973035508e-06, 'epoch': 0.1} + 10%|█ | 1256/12188 [2:37:32<23:10:48, 7.63s/it] 10%|█ | 1257/12188 [2:37:39<22:23:06, 7.37s/it] {'loss': 0.4062, 'grad_norm': 0.6680371887153953, 'learning_rate': 9.860808571663311e-06, 'epoch': 0.1} + 10%|█ | 1257/12188 [2:37:39<22:23:06, 7.37s/it] 10%|█ | 1258/12188 [2:37:46<22:14:56, 7.33s/it] {'loss': 0.4065, 'grad_norm': 0.6215450697406733, 'learning_rate': 9.86049706970905e-06, 'epoch': 0.1} + 10%|█ | 1258/12188 [2:37:46<22:14:56, 7.33s/it] 10%|█ | 1259/12188 [2:37:55<23:53:15, 7.87s/it] {'loss': 0.4118, 'grad_norm': 0.6080051932536694, 'learning_rate': 9.860185224514295e-06, 'epoch': 0.1} + 10%|█ | 1259/12188 [2:37:55<23:53:15, 7.87s/it] 10%|█ | 1260/12188 [2:38:02<22:54:07, 7.54s/it] {'loss': 0.4448, 'grad_norm': 0.6553748716376908, 'learning_rate': 9.859873036101069e-06, 'epoch': 0.1} + 10%|█ | 1260/12188 [2:38:02<22:54:07, 7.54s/it] 10%|█ | 1261/12188 [2:38:09<22:32:24, 7.43s/it] {'loss': 0.4304, 'grad_norm': 0.6770991146202986, 'learning_rate': 9.859560504491417e-06, 'epoch': 0.1} + 10%|█ | 1261/12188 [2:38:09<22:32:24, 7.43s/it] 10%|█ | 1262/12188 [2:38:16<22:14:39, 7.33s/it] {'loss': 0.4064, 'grad_norm': 0.6151082114324095, 'learning_rate': 9.859247629707407e-06, 'epoch': 0.1} + 10%|█ | 1262/12188 [2:38:16<22:14:39, 7.33s/it] 10%|█ | 1263/12188 [2:38:23<21:52:48, 7.21s/it] {'loss': 0.4086, 'grad_norm': 0.6661284667596279, 'learning_rate': 9.85893441177114e-06, 'epoch': 0.1} + 10%|█ | 1263/12188 [2:38:23<21:52:48, 7.21s/it] 10%|█ | 1264/12188 [2:38:30<21:47:45, 7.18s/it] {'loss': 0.4071, 'grad_norm': 0.7394825309385502, 'learning_rate': 9.85862085070473e-06, 'epoch': 0.1} + 10%|█ | 1264/12188 [2:38:30<21:47:45, 7.18s/it] 10%|█ | 1265/12188 [2:38:37<21:48:18, 7.19s/it] {'loss': 0.44, 'grad_norm': 0.7454196505376599, 'learning_rate': 9.858306946530321e-06, 'epoch': 0.1} + 10%|█ | 1265/12188 [2:38:37<21:48:18, 7.19s/it] 10%|█ | 1266/12188 [2:38:46<22:57:50, 7.57s/it] {'loss': 0.4294, 'grad_norm': 0.7613870180313426, 'learning_rate': 9.85799269927008e-06, 'epoch': 0.1} + 10%|█ | 1266/12188 [2:38:46<22:57:50, 7.57s/it] 10%|█ | 1267/12188 [2:38:53<22:16:51, 7.34s/it] {'loss': 0.4128, 'grad_norm': 0.6909022106121174, 'learning_rate': 9.857678108946201e-06, 'epoch': 0.1} + 10%|█ | 1267/12188 [2:38:53<22:16:51, 7.34s/it] 10%|█ | 1268/12188 [2:39:00<21:48:44, 7.19s/it] {'loss': 0.3834, 'grad_norm': 0.641527562411698, 'learning_rate': 9.8573631755809e-06, 'epoch': 0.1} + 10%|█ | 1268/12188 [2:39:00<21:48:44, 7.19s/it] 10%|█ | 1269/12188 [2:39:09<23:59:37, 7.91s/it] {'loss': 0.4202, 'grad_norm': 0.6348015725059497, 'learning_rate': 9.857047899196412e-06, 'epoch': 0.1} + 10%|█ | 1269/12188 [2:39:09<23:59:37, 7.91s/it] 10%|█ | 1270/12188 [2:39:17<24:10:48, 7.97s/it] {'loss': 0.4934, 'grad_norm': 0.6494075450006449, 'learning_rate': 9.856732279815007e-06, 'epoch': 0.1} + 10%|█ | 1270/12188 [2:39:17<24:10:48, 7.97s/it] 10%|█ | 1271/12188 [2:39:25<23:34:27, 7.77s/it] {'loss': 0.431, 'grad_norm': 0.7224195477751449, 'learning_rate': 9.85641631745897e-06, 'epoch': 0.1} + 10%|█ | 1271/12188 [2:39:25<23:34:27, 7.77s/it] 10%|█ | 1272/12188 [2:39:33<23:48:01, 7.85s/it] {'loss': 0.4091, 'grad_norm': 0.652437951040799, 'learning_rate': 9.856100012150615e-06, 'epoch': 0.1} + 10%|█ | 1272/12188 [2:39:33<23:48:01, 7.85s/it] 10%|█ | 1273/12188 [2:39:40<23:32:54, 7.77s/it] {'loss': 0.42, 'grad_norm': 0.6565178724976299, 'learning_rate': 9.855783363912281e-06, 'epoch': 0.1} + 10%|█ | 1273/12188 [2:39:40<23:32:54, 7.77s/it] 10%|█ | 1274/12188 [2:39:48<23:17:46, 7.68s/it] {'loss': 0.4276, 'grad_norm': 0.6297142561708148, 'learning_rate': 9.855466372766326e-06, 'epoch': 0.1} + 10%|█ | 1274/12188 [2:39:48<23:17:46, 7.68s/it] 10%|█ | 1275/12188 [2:39:56<23:54:33, 7.89s/it] {'loss': 0.4624, 'grad_norm': 0.6964022615209245, 'learning_rate': 9.855149038735137e-06, 'epoch': 0.1} + 10%|█ | 1275/12188 [2:39:56<23:54:33, 7.89s/it] 10%|█ | 1276/12188 [2:40:03<23:06:55, 7.63s/it] {'loss': 0.4415, 'grad_norm': 0.6966913787348221, 'learning_rate': 9.854831361841124e-06, 'epoch': 0.1} + 10%|█ | 1276/12188 [2:40:03<23:06:55, 7.63s/it] 10%|█ | 1277/12188 [2:40:10<22:41:44, 7.49s/it] {'loss': 0.4001, 'grad_norm': 0.7304323456578327, 'learning_rate': 9.854513342106718e-06, 'epoch': 0.1} + 10%|█ | 1277/12188 [2:40:10<22:41:44, 7.49s/it] 10%|█ | 1278/12188 [2:40:17<22:05:42, 7.29s/it] {'loss': 0.3874, 'grad_norm': 0.6498924763461473, 'learning_rate': 9.85419497955438e-06, 'epoch': 0.1} + 10%|█ | 1278/12188 [2:40:17<22:05:42, 7.29s/it] 10%|█ | 1279/12188 [2:40:25<22:23:08, 7.39s/it] {'loss': 0.4051, 'grad_norm': 0.7295932212681431, 'learning_rate': 9.853876274206593e-06, 'epoch': 0.1} + 10%|█ | 1279/12188 [2:40:25<22:23:08, 7.39s/it] 11%|█ | 1280/12188 [2:40:32<22:14:44, 7.34s/it] {'loss': 0.4057, 'grad_norm': 0.6862922382764395, 'learning_rate': 9.853557226085859e-06, 'epoch': 0.11} + 11%|█ | 1280/12188 [2:40:32<22:14:44, 7.34s/it] 11%|█ | 1281/12188 [2:40:39<21:45:04, 7.18s/it] {'loss': 0.3884, 'grad_norm': 0.6431361945119496, 'learning_rate': 9.853237835214714e-06, 'epoch': 0.11} + 11%|█ | 1281/12188 [2:40:39<21:45:04, 7.18s/it] 11%|█ | 1282/12188 [2:40:46<21:48:08, 7.20s/it] {'loss': 0.4255, 'grad_norm': 0.6506842882466967, 'learning_rate': 9.85291810161571e-06, 'epoch': 0.11} + 11%|█ | 1282/12188 [2:40:46<21:48:08, 7.20s/it] 11%|█ | 1283/12188 [2:40:53<21:34:40, 7.12s/it] {'loss': 0.4209, 'grad_norm': 0.7019330145392001, 'learning_rate': 9.852598025311425e-06, 'epoch': 0.11} + 11%|█ | 1283/12188 [2:40:53<21:34:40, 7.12s/it] 11%|█ | 1284/12188 [2:41:00<21:08:27, 6.98s/it] {'loss': 0.4151, 'grad_norm': 0.6741459526914287, 'learning_rate': 9.852277606324465e-06, 'epoch': 0.11} + 11%|█ | 1284/12188 [2:41:00<21:08:27, 6.98s/it] 11%|█ | 1285/12188 [2:41:07<21:32:38, 7.11s/it] {'loss': 0.4396, 'grad_norm': 0.6847475163729722, 'learning_rate': 9.851956844677457e-06, 'epoch': 0.11} + 11%|█ | 1285/12188 [2:41:07<21:32:38, 7.11s/it] 11%|█ | 1286/12188 [2:41:14<21:23:38, 7.06s/it] {'loss': 0.4215, 'grad_norm': 0.74698406239371, 'learning_rate': 9.85163574039305e-06, 'epoch': 0.11} + 11%|█ | 1286/12188 [2:41:14<21:23:38, 7.06s/it] 11%|█ | 1287/12188 [2:41:21<21:18:07, 7.03s/it] {'loss': 0.4291, 'grad_norm': 0.7170528134311511, 'learning_rate': 9.851314293493923e-06, 'epoch': 0.11} + 11%|█ | 1287/12188 [2:41:21<21:18:07, 7.03s/it] 11%|█ | 1288/12188 [2:41:28<21:16:10, 7.02s/it] {'loss': 0.4012, 'grad_norm': 0.6394577654773472, 'learning_rate': 9.850992504002775e-06, 'epoch': 0.11} + 11%|█ | 1288/12188 [2:41:28<21:16:10, 7.02s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7ff80e66e7a0> +[Try #0] Failed to fetch sample 4804379 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7ff80e66e7a0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Download QR code'"}, {'from': 'gpt', 'value': '\nclick(x=0.865, y=0.469)\n'}]} + 11%|█ | 1289/12188 [2:41:35<21:35:18, 7.13s/it] {'loss': 0.4513, 'grad_norm': 0.6620277445889368, 'learning_rate': 9.850670371942328e-06, 'epoch': 0.11} + 11%|��� | 1289/12188 [2:41:35<21:35:18, 7.13s/it] 11%|█ | 1290/12188 [2:41:42<21:14:00, 7.01s/it] {'loss': 0.4243, 'grad_norm': 0.669995025232077, 'learning_rate': 9.850347897335335e-06, 'epoch': 0.11} + 11%|█ | 1290/12188 [2:41:42<21:14:00, 7.01s/it] 11%|█ | 1291/12188 [2:41:50<21:46:30, 7.19s/it] {'loss': 0.4261, 'grad_norm': 0.67796346245835, 'learning_rate': 9.850025080204564e-06, 'epoch': 0.11} + 11%|█ | 1291/12188 [2:41:50<21:46:30, 7.19s/it] 11%|█ | 1292/12188 [2:41:57<22:00:29, 7.27s/it] {'loss': 0.4131, 'grad_norm': 0.679456152206682, 'learning_rate': 9.849701920572816e-06, 'epoch': 0.11} + 11%|█ | 1292/12188 [2:41:57<22:00:29, 7.27s/it] 11%|█ | 1293/12188 [2:42:05<22:22:29, 7.39s/it] {'loss': 0.4137, 'grad_norm': 0.7270584145994878, 'learning_rate': 9.84937841846291e-06, 'epoch': 0.11} + 11%|█ | 1293/12188 [2:42:05<22:22:29, 7.39s/it] 11%|█ | 1294/12188 [2:42:13<22:49:35, 7.54s/it] {'loss': 0.4134, 'grad_norm': 0.6587811123114931, 'learning_rate': 9.84905457389769e-06, 'epoch': 0.11} + 11%|█ | 1294/12188 [2:42:13<22:49:35, 7.54s/it] 11%|█ | 1295/12188 [2:42:20<22:20:05, 7.38s/it] {'loss': 0.3961, 'grad_norm': 0.6835403346871266, 'learning_rate': 9.848730386900027e-06, 'epoch': 0.11} + 11%|█ | 1295/12188 [2:42:20<22:20:05, 7.38s/it] 11%|█ | 1296/12188 [2:42:27<21:54:14, 7.24s/it] {'loss': 0.4523, 'grad_norm': 0.6672687991495331, 'learning_rate': 9.848405857492814e-06, 'epoch': 0.11} + 11%|█ | 1296/12188 [2:42:27<21:54:14, 7.24s/it] 11%|█ | 1297/12188 [2:42:33<21:33:52, 7.13s/it] {'loss': 0.3982, 'grad_norm': 0.6630087195290532, 'learning_rate': 9.84808098569897e-06, 'epoch': 0.11} + 11%|█ | 1297/12188 [2:42:33<21:33:52, 7.13s/it] 11%|█ | 1298/12188 [2:42:41<21:37:25, 7.15s/it] {'loss': 0.4169, 'grad_norm': 0.6357095712891924, 'learning_rate': 9.847755771541433e-06, 'epoch': 0.11} + 11%|█ | 1298/12188 [2:42:41<21:37:25, 7.15s/it] 11%|█ | 1299/12188 [2:42:48<22:06:50, 7.31s/it] {'loss': 0.4598, 'grad_norm': 0.7247209470362783, 'learning_rate': 9.847430215043175e-06, 'epoch': 0.11} + 11%|█ | 1299/12188 [2:42:48<22:06:50, 7.31s/it] 11%|█ | 1300/12188 [2:42:55<21:58:24, 7.27s/it] {'loss': 0.3894, 'grad_norm': 0.7058027856080104, 'learning_rate': 9.847104316227183e-06, 'epoch': 0.11} + 11%|█ | 1300/12188 [2:42:55<21:58:24, 7.27s/it] 11%|█ | 1301/12188 [2:43:03<22:06:32, 7.31s/it] {'loss': 0.4279, 'grad_norm': 0.7040991162333894, 'learning_rate': 9.84677807511647e-06, 'epoch': 0.11} + 11%|█ | 1301/12188 [2:43:03<22:06:32, 7.31s/it] 11%|█ | 1302/12188 [2:43:10<21:50:44, 7.22s/it] {'loss': 0.4245, 'grad_norm': 0.6223403987472522, 'learning_rate': 9.846451491734077e-06, 'epoch': 0.11} + 11%|█ | 1302/12188 [2:43:10<21:50:44, 7.22s/it] 11%|█ | 1303/12188 [2:43:17<22:08:34, 7.32s/it] {'loss': 0.3957, 'grad_norm': 0.6567983767718362, 'learning_rate': 9.846124566103065e-06, 'epoch': 0.11} + 11%|█ | 1303/12188 [2:43:17<22:08:34, 7.32s/it] 11%|█ | 1304/12188 [2:43:25<22:03:20, 7.30s/it] {'loss': 0.4145, 'grad_norm': 0.7116349186158616, 'learning_rate': 9.845797298246525e-06, 'epoch': 0.11} + 11%|█ | 1304/12188 [2:43:25<22:03:20, 7.30s/it] 11%|█ | 1305/12188 [2:43:31<21:29:47, 7.11s/it] {'loss': 0.4525, 'grad_norm': 0.717719604744436, 'learning_rate': 9.845469688187562e-06, 'epoch': 0.11} + 11%|█ | 1305/12188 [2:43:31<21:29:47, 7.11s/it] 11%|█ | 1306/12188 [2:43:40<22:45:11, 7.53s/it] {'loss': 0.4612, 'grad_norm': 0.6919400645963889, 'learning_rate': 9.845141735949317e-06, 'epoch': 0.11} + 11%|█ | 1306/12188 [2:43:40<22:45:11, 7.53s/it] 11%|█ | 1307/12188 [2:43:47<22:42:36, 7.51s/it] {'loss': 0.4008, 'grad_norm': 0.6930208961648927, 'learning_rate': 9.844813441554945e-06, 'epoch': 0.11} + 11%|█ | 1307/12188 [2:43:47<22:42:36, 7.51s/it] 11%|█ | 1308/12188 [2:43:55<22:29:54, 7.44s/it] {'loss': 0.3841, 'grad_norm': 0.6634550288776946, 'learning_rate': 9.844484805027631e-06, 'epoch': 0.11} + 11%|█ | 1308/12188 [2:43:55<22:29:54, 7.44s/it] 11%|█ | 1309/12188 [2:44:02<22:18:46, 7.38s/it] {'loss': 0.4226, 'grad_norm': 0.7076601535907917, 'learning_rate': 9.844155826390585e-06, 'epoch': 0.11} + 11%|█ | 1309/12188 [2:44:02<22:18:46, 7.38s/it] 11%|█ | 1310/12188 [2:44:09<21:47:23, 7.21s/it] {'loss': 0.4451, 'grad_norm': 0.6904822351915076, 'learning_rate': 9.843826505667038e-06, 'epoch': 0.11} + 11%|█ | 1310/12188 [2:44:09<21:47:23, 7.21s/it] 11%|█ | 1311/12188 [2:44:16<22:05:29, 7.31s/it] {'loss': 0.3895, 'grad_norm': 0.6489605443167706, 'learning_rate': 9.843496842880242e-06, 'epoch': 0.11} + 11%|█ | 1311/12188 [2:44:16<22:05:29, 7.31s/it] 11%|█ | 1312/12188 [2:44:23<22:02:03, 7.29s/it] {'loss': 0.3857, 'grad_norm': 0.6303222507067854, 'learning_rate': 9.843166838053484e-06, 'epoch': 0.11} + 11%|█ | 1312/12188 [2:44:23<22:02:03, 7.29s/it] 11%|█ | 1313/12188 [2:44:31<22:25:28, 7.42s/it] {'loss': 0.4262, 'grad_norm': 0.7154287113044904, 'learning_rate': 9.842836491210065e-06, 'epoch': 0.11} + 11%|█ | 1313/12188 [2:44:31<22:25:28, 7.42s/it] 11%|█ | 1314/12188 [2:44:38<21:54:37, 7.25s/it] {'loss': 0.418, 'grad_norm': 0.6895409665767743, 'learning_rate': 9.842505802373311e-06, 'epoch': 0.11} + 11%|█ | 1314/12188 [2:44:38<21:54:37, 7.25s/it] 11%|█ | 1315/12188 [2:44:49<25:23:19, 8.41s/it] {'loss': 0.3705, 'grad_norm': 0.6091491376286569, 'learning_rate': 9.84217477156658e-06, 'epoch': 0.11} + 11%|█ | 1315/12188 [2:44:49<25:23:19, 8.41s/it] 11%|█ | 1316/12188 [2:44:56<24:02:25, 7.96s/it] {'loss': 0.3948, 'grad_norm': 0.6397348981708049, 'learning_rate': 9.841843398813244e-06, 'epoch': 0.11} + 11%|█ | 1316/12188 [2:44:56<24:02:25, 7.96s/it] 11%|█ | 1317/12188 [2:45:04<24:08:53, 8.00s/it] {'loss': 0.3851, 'grad_norm': 0.6435997798655205, 'learning_rate': 9.841511684136706e-06, 'epoch': 0.11} + 11%|█ | 1317/12188 [2:45:04<24:08:53, 8.00s/it] 11%|█ | 1318/12188 [2:45:12<23:54:26, 7.92s/it] {'loss': 0.4232, 'grad_norm': 0.6225858656333272, 'learning_rate': 9.841179627560393e-06, 'epoch': 0.11} + 11%|█ | 1318/12188 [2:45:12<23:54:26, 7.92s/it] 11%|█ | 1319/12188 [2:45:19<23:27:19, 7.77s/it] {'loss': 0.4113, 'grad_norm': 0.6611355613557116, 'learning_rate': 9.840847229107753e-06, 'epoch': 0.11} + 11%|█ | 1319/12188 [2:45:19<23:27:19, 7.77s/it] 11%|█ | 1320/12188 [2:45:27<23:21:30, 7.74s/it] {'loss': 0.4134, 'grad_norm': 0.6900657488877701, 'learning_rate': 9.840514488802258e-06, 'epoch': 0.11} + 11%|█ | 1320/12188 [2:45:27<23:21:30, 7.74s/it] 11%|█ | 1321/12188 [2:45:34<22:16:50, 7.38s/it] {'loss': 0.4062, 'grad_norm': 0.6223353693937551, 'learning_rate': 9.840181406667407e-06, 'epoch': 0.11} + 11%|█ | 1321/12188 [2:45:34<22:16:50, 7.38s/it] 11%|█ | 1322/12188 [2:45:40<21:34:47, 7.15s/it] {'loss': 0.4567, 'grad_norm': 0.7053855203942552, 'learning_rate': 9.839847982726721e-06, 'epoch': 0.11} + 11%|█ | 1322/12188 [2:45:40<21:34:47, 7.15s/it] 11%|█ | 1323/12188 [2:45:49<22:46:07, 7.54s/it] {'loss': 0.4257, 'grad_norm': 0.7136399672823662, 'learning_rate': 9.839514217003747e-06, 'epoch': 0.11} + 11%|█ | 1323/12188 [2:45:49<22:46:07, 7.54s/it] 11%|█ | 1324/12188 [2:45:55<22:06:02, 7.32s/it] {'loss': 0.4449, 'grad_norm': 0.6816131032185019, 'learning_rate': 9.839180109522052e-06, 'epoch': 0.11} + 11%|█ | 1324/12188 [2:45:55<22:06:02, 7.32s/it] 11%|█ | 1325/12188 [2:46:02<21:44:24, 7.20s/it] {'loss': 0.3941, 'grad_norm': 0.6773406065022538, 'learning_rate': 9.838845660305237e-06, 'epoch': 0.11} + 11%|█ | 1325/12188 [2:46:02<21:44:24, 7.20s/it] 11%|█ | 1326/12188 [2:46:10<22:09:53, 7.35s/it] {'loss': 0.3877, 'grad_norm': 0.6324905605010055, 'learning_rate': 9.838510869376912e-06, 'epoch': 0.11} + 11%|█ | 1326/12188 [2:46:10<22:09:53, 7.35s/it] 11%|█ | 1327/12188 [2:46:18<22:21:53, 7.41s/it] {'loss': 0.399, 'grad_norm': 0.6299390018148128, 'learning_rate': 9.838175736760723e-06, 'epoch': 0.11} + 11%|█ | 1327/12188 [2:46:18<22:21:53, 7.41s/it] 11%|█ | 1328/12188 [2:46:26<23:17:54, 7.72s/it] {'loss': 0.3952, 'grad_norm': 0.6428600305383756, 'learning_rate': 9.837840262480339e-06, 'epoch': 0.11} + 11%|█ | 1328/12188 [2:46:26<23:17:54, 7.72s/it] 11%|█ | 1329/12188 [2:46:34<23:30:38, 7.79s/it] {'loss': 0.4701, 'grad_norm': 0.6942980175110712, 'learning_rate': 9.837504446559445e-06, 'epoch': 0.11} + 11%|█ | 1329/12188 [2:46:34<23:30:38, 7.79s/it] 11%|█ | 1330/12188 [2:46:41<22:31:12, 7.47s/it] {'loss': 0.4266, 'grad_norm': 0.7372368353410258, 'learning_rate': 9.837168289021762e-06, 'epoch': 0.11} + 11%|█ | 1330/12188 [2:46:41<22:31:12, 7.47s/it] 11%|█ | 1331/12188 [2:46:47<21:49:53, 7.24s/it] {'loss': 0.3995, 'grad_norm': 0.6314235493988264, 'learning_rate': 9.836831789891024e-06, 'epoch': 0.11} + 11%|█ | 1331/12188 [2:46:47<21:49:53, 7.24s/it] 11%|█ | 1332/12188 [2:46:54<21:18:06, 7.06s/it] {'loss': 0.4373, 'grad_norm': 0.6904742011891801, 'learning_rate': 9.836494949190997e-06, 'epoch': 0.11} + 11%|█ | 1332/12188 [2:46:54<21:18:06, 7.06s/it] 11%|█ | 1333/12188 [2:47:01<21:06:48, 7.00s/it] {'loss': 0.4556, 'grad_norm': 0.6735811760990934, 'learning_rate': 9.836157766945467e-06, 'epoch': 0.11} + 11%|█ | 1333/12188 [2:47:01<21:06:48, 7.00s/it] 11%|█ | 1334/12188 [2:47:08<20:53:59, 6.93s/it] {'loss': 0.4144, 'grad_norm': 0.628138309740677, 'learning_rate': 9.835820243178244e-06, 'epoch': 0.11} + 11%|█ | 1334/12188 [2:47:08<20:53:59, 6.93s/it] 11%|█ | 1335/12188 [2:47:15<20:58:25, 6.96s/it] {'loss': 0.4063, 'grad_norm': 0.6419761730822787, 'learning_rate': 9.835482377913167e-06, 'epoch': 0.11} + 11%|█ | 1335/12188 [2:47:15<20:58:25, 6.96s/it] 11%|█ | 1336/12188 [2:47:22<20:53:09, 6.93s/it] {'loss': 0.4115, 'grad_norm': 0.6711330065977402, 'learning_rate': 9.835144171174092e-06, 'epoch': 0.11} + 11%|█ | 1336/12188 [2:47:22<20:53:09, 6.93s/it] 11%|█ | 1337/12188 [2:47:28<20:48:24, 6.90s/it] {'loss': 0.4009, 'grad_norm': 0.6619015158581834, 'learning_rate': 9.834805622984904e-06, 'epoch': 0.11} + 11%|█ | 1337/12188 [2:47:28<20:48:24, 6.90s/it] 11%|█ | 1338/12188 [2:47:35<20:34:25, 6.83s/it] {'loss': 0.4162, 'grad_norm': 0.6665592544440841, 'learning_rate': 9.834466733369512e-06, 'epoch': 0.11} + 11%|█ | 1338/12188 [2:47:35<20:34:25, 6.83s/it] 11%|█ | 1339/12188 [2:47:43<22:02:50, 7.32s/it] {'loss': 0.3527, 'grad_norm': 0.5772220793684119, 'learning_rate': 9.834127502351844e-06, 'epoch': 0.11} + 11%|█ | 1339/12188 [2:47:44<22:02:50, 7.32s/it] 11%|█ | 1340/12188 [2:47:51<22:06:49, 7.34s/it] {'loss': 0.4148, 'grad_norm': 0.6885532831897582, 'learning_rate': 9.83378792995586e-06, 'epoch': 0.11} + 11%|█ | 1340/12188 [2:47:51<22:06:49, 7.34s/it] 11%|█ | 1341/12188 [2:47:58<22:04:12, 7.32s/it] {'loss': 0.4123, 'grad_norm': 0.7317675339355514, 'learning_rate': 9.833448016205539e-06, 'epoch': 0.11} + 11%|█ | 1341/12188 [2:47:58<22:04:12, 7.32s/it] 11%|█ | 1342/12188 [2:48:05<21:51:51, 7.26s/it] {'loss': 0.3912, 'grad_norm': 0.6467775430831076, 'learning_rate': 9.833107761124882e-06, 'epoch': 0.11} + 11%|█ | 1342/12188 [2:48:05<21:51:51, 7.26s/it] 11%|█ | 1343/12188 [2:48:12<21:49:30, 7.24s/it] {'loss': 0.4104, 'grad_norm': 0.6428868028500705, 'learning_rate': 9.832767164737922e-06, 'epoch': 0.11} + 11%|█ | 1343/12188 [2:48:12<21:49:30, 7.24s/it] 11%|█ | 1344/12188 [2:48:20<22:13:04, 7.38s/it] {'loss': 0.4309, 'grad_norm': 0.6357336110694325, 'learning_rate': 9.83242622706871e-06, 'epoch': 0.11} + 11%|█ | 1344/12188 [2:48:20<22:13:04, 7.38s/it] 11%|█ | 1345/12188 [2:48:27<21:52:07, 7.26s/it] {'loss': 0.4038, 'grad_norm': 0.6170507032570505, 'learning_rate': 9.83208494814132e-06, 'epoch': 0.11} + 11%|█ | 1345/12188 [2:48:27<21:52:07, 7.26s/it] 11%|█ | 1346/12188 [2:48:34<21:55:13, 7.28s/it] {'loss': 0.3893, 'grad_norm': 0.6742377444630175, 'learning_rate': 9.831743327979855e-06, 'epoch': 0.11} + 11%|█ | 1346/12188 [2:48:34<21:55:13, 7.28s/it] 11%|█ | 1347/12188 [2:48:41<21:19:07, 7.08s/it] {'loss': 0.4261, 'grad_norm': 0.640384665336136, 'learning_rate': 9.831401366608438e-06, 'epoch': 0.11} + 11%|█ | 1347/12188 [2:48:41<21:19:07, 7.08s/it] 11%|█ | 1348/12188 [2:48:49<22:01:23, 7.31s/it] {'loss': 0.3998, 'grad_norm': 0.6846455386374044, 'learning_rate': 9.831059064051221e-06, 'epoch': 0.11} + 11%|█ | 1348/12188 [2:48:49<22:01:23, 7.31s/it] 11%|█ | 1349/12188 [2:48:57<22:50:13, 7.58s/it] {'loss': 0.3958, 'grad_norm': 0.6797656807771957, 'learning_rate': 9.830716420332373e-06, 'epoch': 0.11} + 11%|█ | 1349/12188 [2:48:57<22:50:13, 7.58s/it] 11%|█ | 1350/12188 [2:49:04<22:17:04, 7.40s/it] {'loss': 0.4504, 'grad_norm': 0.6997086949444151, 'learning_rate': 9.830373435476092e-06, 'epoch': 0.11} + 11%|█ | 1350/12188 [2:49:04<22:17:04, 7.40s/it] 11%|█ | 1351/12188 [2:49:11<22:04:58, 7.34s/it] {'loss': 0.4305, 'grad_norm': 0.7244951685863529, 'learning_rate': 9.8300301095066e-06, 'epoch': 0.11} + 11%|█ | 1351/12188 [2:49:11<22:04:58, 7.34s/it] 11%|█ | 1352/12188 [2:49:19<21:57:15, 7.29s/it] {'loss': 0.4118, 'grad_norm': 0.6452638699470127, 'learning_rate': 9.829686442448141e-06, 'epoch': 0.11} + 11%|█ | 1352/12188 [2:49:19<21:57:15, 7.29s/it] 11%|█ | 1353/12188 [2:49:26<22:11:57, 7.38s/it] {'loss': 0.4179, 'grad_norm': 0.6635011012319026, 'learning_rate': 9.829342434324986e-06, 'epoch': 0.11} + 11%|█ | 1353/12188 [2:49:26<22:11:57, 7.38s/it] 11%|█ | 1354/12188 [2:49:34<22:40:42, 7.54s/it] {'loss': 0.3677, 'grad_norm': 0.5887538087171902, 'learning_rate': 9.828998085161428e-06, 'epoch': 0.11} + 11%|█ | 1354/12188 [2:49:34<22:40:42, 7.54s/it] 11%|█ | 1355/12188 [2:49:41<22:14:04, 7.39s/it] {'loss': 0.4038, 'grad_norm': 0.6627385616143883, 'learning_rate': 9.828653394981781e-06, 'epoch': 0.11} + 11%|█ | 1355/12188 [2:49:41<22:14:04, 7.39s/it][2025-08-17 00:00:16,545] [WARNING] [stage3.py:2118:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time + 11%|█ | 1356/12188 [2:49:50<23:51:27, 7.93s/it] {'loss': 0.4007, 'grad_norm': 0.6230572814064329, 'learning_rate': 9.828308363810392e-06, 'epoch': 0.11} + 11%|█ | 1356/12188 [2:49:50<23:51:27, 7.93s/it] 11%|█ | 1357/12188 [2:49:57<22:52:51, 7.61s/it] {'loss': 0.3939, 'grad_norm': 0.6867762837760197, 'learning_rate': 9.827962991671622e-06, 'epoch': 0.11} + 11%|█ | 1357/12188 [2:49:57<22:52:51, 7.61s/it] 11%|█ | 1358/12188 [2:50:05<22:47:35, 7.58s/it] {'loss': 0.3839, 'grad_norm': 0.6575349774473709, 'learning_rate': 9.827617278589864e-06, 'epoch': 0.11} + 11%|█ | 1358/12188 [2:50:05<22:47:35, 7.58s/it] 11%|█ | 1359/12188 [2:50:11<22:01:38, 7.32s/it] {'loss': 0.4107, 'grad_norm': 0.6721876841910309, 'learning_rate': 9.827271224589528e-06, 'epoch': 0.11} + 11%|█ | 1359/12188 [2:50:11<22:01:38, 7.32s/it] 11%|█ | 1360/12188 [2:50:18<21:50:11, 7.26s/it] {'loss': 0.4033, 'grad_norm': 0.7333397532933614, 'learning_rate': 9.826924829695055e-06, 'epoch': 0.11} + 11%|█ | 1360/12188 [2:50:18<21:50:11, 7.26s/it] 11%|█ | 1361/12188 [2:50:26<21:47:52, 7.25s/it] {'loss': 0.4298, 'grad_norm': 0.6389627815062873, 'learning_rate': 9.826578093930904e-06, 'epoch': 0.11} + 11%|█ | 1361/12188 [2:50:26<21:47:52, 7.25s/it] 11%|█ | 1362/12188 [2:50:33<21:42:42, 7.22s/it] {'loss': 0.4176, 'grad_norm': 0.6259215557035291, 'learning_rate': 9.826231017321564e-06, 'epoch': 0.11} + 11%|█ | 1362/12188 [2:50:33<21:42:42, 7.22s/it] 11%|█ | 1363/12188 [2:50:41<22:37:25, 7.52s/it] {'loss': 0.4517, 'grad_norm': 0.7102221340638504, 'learning_rate': 9.825883599891543e-06, 'epoch': 0.11} + 11%|█ | 1363/12188 [2:50:41<22:37:25, 7.52s/it] 11%|█ | 1364/12188 [2:50:49<22:37:45, 7.53s/it] {'loss': 0.4415, 'grad_norm': 0.6408154444543539, 'learning_rate': 9.825535841665374e-06, 'epoch': 0.11} + 11%|█ | 1364/12188 [2:50:49<22:37:45, 7.53s/it] 11%|█ | 1365/12188 [2:50:55<21:57:17, 7.30s/it] {'loss': 0.4046, 'grad_norm': 0.6245098562365412, 'learning_rate': 9.825187742667619e-06, 'epoch': 0.11} + 11%|█ | 1365/12188 [2:50:55<21:57:17, 7.30s/it] 11%|█ | 1366/12188 [2:51:02<21:38:37, 7.20s/it] {'loss': 0.4143, 'grad_norm': 0.6551624449826411, 'learning_rate': 9.824839302922858e-06, 'epoch': 0.11} + 11%|█ | 1366/12188 [2:51:02<21:38:37, 7.20s/it] 11%|█ | 1367/12188 [2:51:10<21:49:00, 7.26s/it] {'loss': 0.3838, 'grad_norm': 0.6376963090165328, 'learning_rate': 9.824490522455695e-06, 'epoch': 0.11} + 11%|█ | 1367/12188 [2:51:10<21:49:00, 7.26s/it] 11%|█ | 1368/12188 [2:51:17<21:39:56, 7.21s/it] {'loss': 0.4216, 'grad_norm': 0.6749409300050256, 'learning_rate': 9.824141401290763e-06, 'epoch': 0.11} + 11%|█ | 1368/12188 [2:51:17<21:39:56, 7.21s/it] 11%|█ | 1369/12188 [2:51:24<21:14:14, 7.07s/it] {'loss': 0.3613, 'grad_norm': 0.6496808878233459, 'learning_rate': 9.823791939452717e-06, 'epoch': 0.11} + 11%|█ | 1369/12188 [2:51:24<21:14:14, 7.07s/it] 11%|█ | 1370/12188 [2:51:31<21:27:51, 7.14s/it] {'loss': 0.4004, 'grad_norm': 0.6177556392544961, 'learning_rate': 9.823442136966234e-06, 'epoch': 0.11} + 11%|█ | 1370/12188 [2:51:31<21:27:51, 7.14s/it] 11%|█ | 1371/12188 [2:51:38<21:32:45, 7.17s/it] {'loss': 0.4003, 'grad_norm': 0.653047177829386, 'learning_rate': 9.823091993856015e-06, 'epoch': 0.11} + 11%|█ | 1371/12188 [2:51:38<21:32:45, 7.17s/it] 11%|█▏ | 1372/12188 [2:51:45<20:58:40, 6.98s/it] {'loss': 0.3979, 'grad_norm': 0.6656525769510159, 'learning_rate': 9.82274151014679e-06, 'epoch': 0.11} + 11%|█▏ | 1372/12188 [2:51:45<20:58:40, 6.98s/it] 11%|█▏ | 1373/12188 [2:51:52<20:56:27, 6.97s/it] {'loss': 0.4233, 'grad_norm': 0.6715476297560008, 'learning_rate': 9.822390685863308e-06, 'epoch': 0.11} + 11%|█▏ | 1373/12188 [2:51:52<20:56:27, 6.97s/it] 11%|█▏ | 1374/12188 [2:51:59<21:13:58, 7.07s/it] {'loss': 0.4037, 'grad_norm': 0.6703898732264949, 'learning_rate': 9.822039521030342e-06, 'epoch': 0.11} + 11%|█▏ | 1374/12188 [2:51:59<21:13:58, 7.07s/it] 11%|█▏ | 1375/12188 [2:52:06<21:05:24, 7.02s/it] {'loss': 0.3937, 'grad_norm': 0.6647970689150947, 'learning_rate': 9.821688015672695e-06, 'epoch': 0.11} + 11%|█▏ | 1375/12188 [2:52:06<21:05:24, 7.02s/it] 11%|█▏ | 1376/12188 [2:52:13<21:09:54, 7.05s/it] {'loss': 0.4208, 'grad_norm': 0.6394261978115612, 'learning_rate': 9.821336169815185e-06, 'epoch': 0.11} + 11%|█▏ | 1376/12188 [2:52:13<21:09:54, 7.05s/it] 11%|█▏ | 1377/12188 [2:52:20<21:03:49, 7.01s/it] {'loss': 0.4107, 'grad_norm': 0.6379846953967013, 'learning_rate': 9.82098398348266e-06, 'epoch': 0.11} + 11%|█▏ | 1377/12188 [2:52:20<21:03:49, 7.01s/it] 11%|█▏ | 1378/12188 [2:52:30<23:30:38, 7.83s/it] {'loss': 0.4129, 'grad_norm': 0.6124335939708841, 'learning_rate': 9.820631456699992e-06, 'epoch': 0.11} + 11%|█▏ | 1378/12188 [2:52:30<23:30:38, 7.83s/it] 11%|█▏ | 1379/12188 [2:52:36<22:40:23, 7.55s/it] {'loss': 0.3802, 'grad_norm': 0.6813489952821664, 'learning_rate': 9.820278589492076e-06, 'epoch': 0.11} + 11%|█▏ | 1379/12188 [2:52:36<22:40:23, 7.55s/it] 11%|█▏ | 1380/12188 [2:52:44<22:27:17, 7.48s/it] {'loss': 0.3946, 'grad_norm': 0.6049032244510816, 'learning_rate': 9.819925381883831e-06, 'epoch': 0.11} + 11%|█▏ | 1380/12188 [2:52:44<22:27:17, 7.48s/it] 11%|█▏ | 1381/12188 [2:52:51<22:15:45, 7.42s/it] {'loss': 0.4383, 'grad_norm': 0.7205239851034201, 'learning_rate': 9.819571833900198e-06, 'epoch': 0.11} + 11%|█▏ | 1381/12188 [2:52:51<22:15:45, 7.42s/it] 11%|█▏ | 1382/12188 [2:52:58<21:58:17, 7.32s/it] {'loss': 0.4588, 'grad_norm': 0.6731353993097979, 'learning_rate': 9.819217945566146e-06, 'epoch': 0.11} + 11%|█▏ | 1382/12188 [2:52:58<21:58:17, 7.32s/it] 11%|█▏ | 1383/12188 [2:53:06<22:13:30, 7.40s/it] {'loss': 0.3678, 'grad_norm': 0.6701798598264213, 'learning_rate': 9.818863716906666e-06, 'epoch': 0.11} + 11%|█▏ | 1383/12188 [2:53:06<22:13:30, 7.40s/it] 11%|█▏ | 1384/12188 [2:53:16<25:01:19, 8.34s/it] {'loss': 0.442, 'grad_norm': 0.6712311589289186, 'learning_rate': 9.818509147946771e-06, 'epoch': 0.11} + 11%|█▏ | 1384/12188 [2:53:16<25:01:19, 8.34s/it] 11%|█▏ | 1385/12188 [2:53:23<23:43:28, 7.91s/it] {'loss': 0.3614, 'grad_norm': 0.6656612601713228, 'learning_rate': 9.818154238711504e-06, 'epoch': 0.11} + 11%|█▏ | 1385/12188 [2:53:23<23:43:28, 7.91s/it] 11%|█▏ | 1386/12188 [2:53:30<22:50:48, 7.61s/it] {'loss': 0.4206, 'grad_norm': 0.7143717432953391, 'learning_rate': 9.817798989225922e-06, 'epoch': 0.11} + 11%|█▏ | 1386/12188 [2:53:30<22:50:48, 7.61s/it] 11%|█▏ | 1387/12188 [2:53:37<21:55:48, 7.31s/it] {'loss': 0.4168, 'grad_norm': 0.6422093736534835, 'learning_rate': 9.817443399515119e-06, 'epoch': 0.11} + 11%|█▏ | 1387/12188 [2:53:37<21:55:48, 7.31s/it] 11%|█▏ | 1388/12188 [2:53:45<22:57:08, 7.65s/it] {'loss': 0.4319, 'grad_norm': 0.6235270697102051, 'learning_rate': 9.817087469604201e-06, 'epoch': 0.11} + 11%|█▏ | 1388/12188 [2:53:45<22:57:08, 7.65s/it] 11%|█▏ | 1389/12188 [2:53:52<22:12:22, 7.40s/it] {'loss': 0.4585, 'grad_norm': 0.6222532454603311, 'learning_rate': 9.816731199518306e-06, 'epoch': 0.11} + 11%|█▏ | 1389/12188 [2:53:52<22:12:22, 7.40s/it]/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py:3406: DecompressionBombWarning: Image size (108679428 pixels) exceeds limit of 89478485 pixels, could be decompression bomb DOS attack. + warnings.warn( + 11%|█▏ | 1390/12188 [2:53:59<21:42:55, 7.24s/it] {'loss': 0.3869, 'grad_norm': 0.6547530439738983, 'learning_rate': 9.816374589282592e-06, 'epoch': 0.11} + 11%|█▏ | 1390/12188 [2:53:59<21:42:55, 7.24s/it] 11%|█▏ | 1391/12188 [2:54:07<22:23:54, 7.47s/it] {'loss': 0.3692, 'grad_norm': 0.6163793568942376, 'learning_rate': 9.816017638922243e-06, 'epoch': 0.11} + 11%|█▏ | 1391/12188 [2:54:07<22:23:54, 7.47s/it] 11%|█▏ | 1392/12188 [2:54:13<21:32:28, 7.18s/it] {'loss': 0.4116, 'grad_norm': 0.6185712261935417, 'learning_rate': 9.815660348462465e-06, 'epoch': 0.11} + 11%|█▏ | 1392/12188 [2:54:13<21:32:28, 7.18s/it] 11%|█▏ | 1393/12188 [2:54:21<22:24:14, 7.47s/it] {'loss': 0.4231, 'grad_norm': 0.6696028205335488, 'learning_rate': 9.815302717928492e-06, 'epoch': 0.11} + 11%|█▏ | 1393/12188 [2:54:22<22:24:14, 7.47s/it] 11%|█▏ | 1394/12188 [2:54:29<21:59:05, 7.33s/it] {'loss': 0.4029, 'grad_norm': 0.6730500639621144, 'learning_rate': 9.814944747345576e-06, 'epoch': 0.11} + 11%|█▏ | 1394/12188 [2:54:29<21:59:05, 7.33s/it] 11%|█▏ | 1395/12188 [2:54:37<22:49:51, 7.62s/it] {'loss': 0.3758, 'grad_norm': 0.6578081932061403, 'learning_rate': 9.814586436738998e-06, 'epoch': 0.11} + 11%|█▏ | 1395/12188 [2:54:37<22:49:51, 7.62s/it] 11%|█▏ | 1396/12188 [2:54:44<22:38:05, 7.55s/it] {'loss': 0.3671, 'grad_norm': 0.658920807053115, 'learning_rate': 9.81422778613406e-06, 'epoch': 0.11} + 11%|█▏ | 1396/12188 [2:54:44<22:38:05, 7.55s/it] 11%|█▏ | 1397/12188 [2:54:51<22:07:19, 7.38s/it] {'loss': 0.4348, 'grad_norm': 0.6693453606440944, 'learning_rate': 9.813868795556092e-06, 'epoch': 0.11} + 11%|█▏ | 1397/12188 [2:54:51<22:07:19, 7.38s/it] 11%|█▏ | 1398/12188 [2:54:59<22:10:09, 7.40s/it] {'loss': 0.4268, 'grad_norm': 0.6832477109511588, 'learning_rate': 9.813509465030442e-06, 'epoch': 0.11} + 11%|█▏ | 1398/12188 [2:54:59<22:10:09, 7.40s/it] 11%|█▏ | 1399/12188 [2:55:05<21:37:20, 7.21s/it] {'loss': 0.4322, 'grad_norm': 0.6461336999545818, 'learning_rate': 9.813149794582489e-06, 'epoch': 0.11} + 11%|█▏ | 1399/12188 [2:55:05<21:37:20, 7.21s/it] 11%|█▏ | 1400/12188 [2:55:12<21:12:45, 7.08s/it] {'loss': 0.4207, 'grad_norm': 0.688336361764318, 'learning_rate': 9.812789784237628e-06, 'epoch': 0.11} + 11%|█▏ | 1400/12188 [2:55:12<21:12:45, 7.08s/it] 11%|█▏ | 1401/12188 [2:55:20<21:45:46, 7.26s/it] {'loss': 0.4233, 'grad_norm': 0.6078744749811151, 'learning_rate': 9.812429434021289e-06, 'epoch': 0.11} + 11%|█▏ | 1401/12188 [2:55:20<21:45:46, 7.26s/it] 12%|█▏ | 1402/12188 [2:55:28<22:48:40, 7.61s/it] {'loss': 0.4001, 'grad_norm': 0.6287283053473729, 'learning_rate': 9.812068743958912e-06, 'epoch': 0.12} + 12%|█▏ | 1402/12188 [2:55:28<22:48:40, 7.61s/it] 12%|█▏ | 1403/12188 [2:55:35<22:08:46, 7.39s/it] {'loss': 0.4051, 'grad_norm': 0.6520943351966546, 'learning_rate': 9.811707714075971e-06, 'epoch': 0.12} + 12%|█▏ | 1403/12188 [2:55:35<22:08:46, 7.39s/it] 12%|█▏ | 1404/12188 [2:55:44<23:27:39, 7.83s/it] {'loss': 0.3892, 'grad_norm': 0.6333914700691294, 'learning_rate': 9.811346344397963e-06, 'epoch': 0.12} + 12%|█▏ | 1404/12188 [2:55:44<23:27:39, 7.83s/it] 12%|█▏ | 1405/12188 [2:55:51<22:38:58, 7.56s/it] {'loss': 0.4501, 'grad_norm': 0.6776611372803375, 'learning_rate': 9.810984634950406e-06, 'epoch': 0.12} + 12%|█▏ | 1405/12188 [2:55:51<22:38:58, 7.56s/it] 12%|█▏ | 1406/12188 [2:55:58<21:59:49, 7.34s/it] {'loss': 0.4473, 'grad_norm': 0.6677028818057367, 'learning_rate': 9.810622585758844e-06, 'epoch': 0.12} + 12%|█▏ | 1406/12188 [2:55:58<21:59:49, 7.34s/it] 12%|█▏ | 1407/12188 [2:56:05<21:26:31, 7.16s/it] {'loss': 0.3942, 'grad_norm': 0.6388580191757065, 'learning_rate': 9.810260196848841e-06, 'epoch': 0.12} + 12%|█▏ | 1407/12188 [2:56:05<21:26:31, 7.16s/it] 12%|█▏ | 1408/12188 [2:56:11<20:55:25, 6.99s/it] {'loss': 0.4678, 'grad_norm': 0.6693782373647202, 'learning_rate': 9.809897468245994e-06, 'epoch': 0.12} + 12%|█▏ | 1408/12188 [2:56:11<20:55:25, 6.99s/it] 12%|█▏ | 1409/12188 [2:56:18<21:15:52, 7.10s/it] {'loss': 0.4456, 'grad_norm': 0.6823064610913764, 'learning_rate': 9.809534399975916e-06, 'epoch': 0.12} + 12%|█▏ | 1409/12188 [2:56:18<21:15:52, 7.10s/it] 12%|█▏ | 1410/12188 [2:56:26<21:15:47, 7.10s/it] {'loss': 0.4172, 'grad_norm': 0.6713905970540474, 'learning_rate': 9.809170992064245e-06, 'epoch': 0.12} + 12%|█▏ | 1410/12188 [2:56:26<21:15:47, 7.10s/it] 12%|█▏ | 1411/12188 [2:56:33<21:09:41, 7.07s/it] {'loss': 0.4536, 'grad_norm': 0.7041533246712243, 'learning_rate': 9.808807244536644e-06, 'epoch': 0.12} + 12%|█▏ | 1411/12188 [2:56:33<21:09:41, 7.07s/it] 12%|█▏ | 1412/12188 [2:56:40<21:09:16, 7.07s/it] {'loss': 0.4421, 'grad_norm': 0.6731651622997384, 'learning_rate': 9.808443157418803e-06, 'epoch': 0.12} + 12%|█▏ | 1412/12188 [2:56:40<21:09:16, 7.07s/it] 12%|█▏ | 1413/12188 [2:56:47<21:33:26, 7.20s/it] {'loss': 0.4346, 'grad_norm': 0.6840302780175814, 'learning_rate': 9.80807873073643e-06, 'epoch': 0.12} + 12%|█▏ | 1413/12188 [2:56:47<21:33:26, 7.20s/it] 12%|█▏ | 1414/12188 [2:56:54<21:05:30, 7.05s/it] {'loss': 0.3943, 'grad_norm': 0.6310584960138703, 'learning_rate': 9.807713964515263e-06, 'epoch': 0.12} + 12%|█▏ | 1414/12188 [2:56:54<21:05:30, 7.05s/it] 12%|█▏ | 1415/12188 [2:57:01<21:04:59, 7.05s/it] {'loss': 0.402, 'grad_norm': 0.6456573399494864, 'learning_rate': 9.80734885878106e-06, 'epoch': 0.12} + 12%|█▏ | 1415/12188 [2:57:01<21:04:59, 7.05s/it] 12%|█▏ | 1416/12188 [2:57:08<20:58:52, 7.01s/it] {'loss': 0.3841, 'grad_norm': 0.6534126002850384, 'learning_rate': 9.806983413559603e-06, 'epoch': 0.12} + 12%|█▏ | 1416/12188 [2:57:08<20:58:52, 7.01s/it] 12%|█▏ | 1417/12188 [2:57:16<22:02:42, 7.37s/it] {'loss': 0.4793, 'grad_norm': 0.723982129382008, 'learning_rate': 9.806617628876701e-06, 'epoch': 0.12} + 12%|█▏ | 1417/12188 [2:57:16<22:02:42, 7.37s/it] 12%|█▏ | 1418/12188 [2:57:23<22:00:17, 7.36s/it] {'loss': 0.4066, 'grad_norm': 0.7025621449068458, 'learning_rate': 9.806251504758184e-06, 'epoch': 0.12} + 12%|█▏ | 1418/12188 [2:57:23<22:00:17, 7.36s/it] 12%|█▏ | 1419/12188 [2:57:33<23:55:13, 8.00s/it] {'loss': 0.384, 'grad_norm': 0.6220652795533083, 'learning_rate': 9.805885041229909e-06, 'epoch': 0.12} + 12%|█▏ | 1419/12188 [2:57:33<23:55:13, 8.00s/it] 12%|█▏ | 1420/12188 [2:57:41<23:48:47, 7.96s/it] {'loss': 0.3838, 'grad_norm': 0.6515472402253747, 'learning_rate': 9.805518238317753e-06, 'epoch': 0.12} + 12%|█▏ | 1420/12188 [2:57:41<23:48:47, 7.96s/it] 12%|█▏ | 1421/12188 [2:57:48<22:58:44, 7.68s/it] {'loss': 0.3793, 'grad_norm': 0.6491221922491732, 'learning_rate': 9.805151096047619e-06, 'epoch': 0.12} + 12%|█▏ | 1421/12188 [2:57:48<22:58:44, 7.68s/it] 12%|█▏ | 1422/12188 [2:57:55<22:47:13, 7.62s/it] {'loss': 0.4167, 'grad_norm': 0.6596839103435707, 'learning_rate': 9.804783614445435e-06, 'epoch': 0.12} + 12%|█▏ | 1422/12188 [2:57:55<22:47:13, 7.62s/it] 12%|█▏ | 1423/12188 [2:58:03<22:31:45, 7.53s/it] {'loss': 0.4025, 'grad_norm': 0.6512736875104639, 'learning_rate': 9.804415793537152e-06, 'epoch': 0.12} + 12%|█▏ | 1423/12188 [2:58:03<22:31:45, 7.53s/it] 12%|█▏ | 1424/12188 [2:58:10<22:34:54, 7.55s/it] {'loss': 0.408, 'grad_norm': 0.6386226796780061, 'learning_rate': 9.804047633348744e-06, 'epoch': 0.12} + 12%|█▏ | 1424/12188 [2:58:10<22:34:54, 7.55s/it] 12%|█▏ | 1425/12188 [2:58:18<23:07:26, 7.73s/it] {'loss': 0.4222, 'grad_norm': 0.7183694068179068, 'learning_rate': 9.80367913390621e-06, 'epoch': 0.12} + 12%|█▏ | 1425/12188 [2:58:18<23:07:26, 7.73s/it] 12%|█▏ | 1426/12188 [2:58:26<23:01:55, 7.70s/it] {'loss': 0.412, 'grad_norm': 0.6616504260315532, 'learning_rate': 9.803310295235576e-06, 'epoch': 0.12} + 12%|█▏ | 1426/12188 [2:58:26<23:01:55, 7.70s/it] 12%|█▏ | 1427/12188 [2:58:33<22:39:55, 7.58s/it] {'loss': 0.4239, 'grad_norm': 0.6214319187062196, 'learning_rate': 9.802941117362882e-06, 'epoch': 0.12} + 12%|█▏ | 1427/12188 [2:58:33<22:39:55, 7.58s/it] 12%|█▏ | 1428/12188 [2:58:40<22:19:19, 7.47s/it] {'loss': 0.3967, 'grad_norm': 0.6261548881913358, 'learning_rate': 9.802571600314205e-06, 'epoch': 0.12} + 12%|█▏ | 1428/12188 [2:58:40<22:19:19, 7.47s/it] 12%|█▏ | 1429/12188 [2:58:47<21:45:46, 7.28s/it] {'loss': 0.4459, 'grad_norm': 0.6499442118833935, 'learning_rate': 9.802201744115637e-06, 'epoch': 0.12} + 12%|█▏ | 1429/12188 [2:58:47<21:45:46, 7.28s/it] 12%|█▏ | 1430/12188 [2:58:54<21:37:34, 7.24s/it] {'loss': 0.4007, 'grad_norm': 0.6827251354293176, 'learning_rate': 9.801831548793298e-06, 'epoch': 0.12} + 12%|█▏ | 1430/12188 [2:58:54<21:37:34, 7.24s/it] 12%|█▏ | 1431/12188 [2:59:02<21:53:00, 7.32s/it] {'loss': 0.3917, 'grad_norm': 0.6923666467800703, 'learning_rate': 9.801461014373328e-06, 'epoch': 0.12} + 12%|█▏ | 1431/12188 [2:59:02<21:53:00, 7.32s/it] 12%|█▏ | 1432/12188 [2:59:09<21:41:18, 7.26s/it] {'loss': 0.4248, 'grad_norm': 0.7114080587856705, 'learning_rate': 9.801090140881897e-06, 'epoch': 0.12} + 12%|█▏ | 1432/12188 [2:59:09<21:41:18, 7.26s/it] 12%|█▏ | 1433/12188 [2:59:16<21:37:21, 7.24s/it] {'loss': 0.4131, 'grad_norm': 0.6687261576880416, 'learning_rate': 9.800718928345195e-06, 'epoch': 0.12} + 12%|█▏ | 1433/12188 [2:59:16<21:37:21, 7.24s/it] 12%|█▏ | 1434/12188 [2:59:24<21:40:39, 7.26s/it] {'loss': 0.3988, 'grad_norm': 0.6665655245249072, 'learning_rate': 9.800347376789432e-06, 'epoch': 0.12} + 12%|█▏ | 1434/12188 [2:59:24<21:40:39, 7.26s/it] 12%|█▏ | 1435/12188 [2:59:31<21:43:40, 7.27s/it] {'loss': 0.4294, 'grad_norm': 0.7001700538370936, 'learning_rate': 9.799975486240852e-06, 'epoch': 0.12} + 12%|█▏ | 1435/12188 [2:59:31<21:43:40, 7.27s/it] 12%|█▏ | 1436/12188 [2:59:38<21:35:17, 7.23s/it] {'loss': 0.4121, 'grad_norm': 0.637932426326832, 'learning_rate': 9.799603256725713e-06, 'epoch': 0.12} + 12%|█▏ | 1436/12188 [2:59:38<21:35:17, 7.23s/it] 12%|█▏ | 1437/12188 [2:59:45<21:10:26, 7.09s/it] {'loss': 0.3966, 'grad_norm': 0.6733802201200529, 'learning_rate': 9.799230688270304e-06, 'epoch': 0.12} + 12%|█▏ | 1437/12188 [2:59:45<21:10:26, 7.09s/it] 12%|█▏ | 1438/12188 [2:59:52<21:18:00, 7.13s/it] {'loss': 0.4052, 'grad_norm': 0.6587022034130628, 'learning_rate': 9.798857780900936e-06, 'epoch': 0.12} + 12%|█▏ | 1438/12188 [2:59:52<21:18:00, 7.13s/it] 12%|█▏ | 1439/12188 [3:00:00<22:05:22, 7.40s/it] {'loss': 0.4305, 'grad_norm': 0.6823639356109784, 'learning_rate': 9.79848453464394e-06, 'epoch': 0.12} + 12%|█▏ | 1439/12188 [3:00:00<22:05:22, 7.40s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 90, in pil_loader + return img.convert("RGB") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 993, in convert + self.load() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 319, in load + raise _get_oserror(err_code, encoder=False) +OSError: broken data stream when reading image file +[Try #0] Failed to fetch sample 6013702 in VC:s3://gui-agent/data_20250623/windows_augment/images. Exception: broken data stream when reading image file +Problematic sample: {'image': 'autocad/20250508_161646_1/images/before_screenshot_1_id_73_internvl_appearance_crop_0_grounding_instructions_point_o_paste.png', 'conversations': [{'from': 'human', 'value': "\nLocate the UI element referred to: A tab-like button labeled 'Reference' with a downward-facing arrow indicator to its right. The button has a darker background than the surrounding toolbar area, with white text. It appears to be part of a toolbar or ribbon interface in AutoCAD Mechanical 2019."}, {'from': 'gpt', 'value': "A tab-like button labeled 'Reference' with a downward-facing arrow indicator to its right. The button has a darker background than the surrounding toolbar area, with white text. It appears to be part of a toolbar or ribbon interface in AutoCAD Mechanical 2019.[[142, 241, 183, 251]]"}], 'width': 2704, 'height': 1756} + 12%|█▏ | 1440/12188 [3:00:07<21:59:04, 7.36s/it] {'loss': 0.3806, 'grad_norm': 0.648886258147364, 'learning_rate': 9.798110949525675e-06, 'epoch': 0.12} + 12%|█▏ | 1440/12188 [3:00:07<21:59:04, 7.36s/it] 12%|█▏ | 1441/12188 [3:00:16<22:57:46, 7.69s/it] {'loss': 0.4062, 'grad_norm': 0.6495668577154488, 'learning_rate': 9.797737025572525e-06, 'epoch': 0.12} + 12%|█▏ | 1441/12188 [3:00:16<22:57:46, 7.69s/it] 12%|█▏ | 1442/12188 [3:00:22<22:07:54, 7.41s/it] {'loss': 0.4288, 'grad_norm': 0.6336336387035173, 'learning_rate': 9.797362762810892e-06, 'epoch': 0.12} + 12%|█▏ | 1442/12188 [3:00:22<22:07:54, 7.41s/it] 12%|█▏ | 1443/12188 [3:00:29<21:35:21, 7.23s/it] {'loss': 0.3882, 'grad_norm': 0.6213891744592384, 'learning_rate': 9.79698816126721e-06, 'epoch': 0.12} + 12%|█▏ | 1443/12188 [3:00:29<21:35:21, 7.23s/it] 12%|█▏ | 1444/12188 [3:00:37<22:04:08, 7.39s/it] {'loss': 0.3889, 'grad_norm': 0.6907357036173748, 'learning_rate': 9.79661322096793e-06, 'epoch': 0.12} + 12%|█▏ | 1444/12188 [3:00:37<22:04:08, 7.39s/it] 12%|█▏ | 1445/12188 [3:00:44<21:27:43, 7.19s/it] {'loss': 0.4061, 'grad_norm': 0.6924070650974781, 'learning_rate': 9.796237941939532e-06, 'epoch': 0.12} + 12%|█▏ | 1445/12188 [3:00:44<21:27:43, 7.19s/it] 12%|█▏ | 1446/12188 [3:00:52<22:08:00, 7.42s/it] {'loss': 0.4402, 'grad_norm': 0.665337839817549, 'learning_rate': 9.795862324208516e-06, 'epoch': 0.12} + 12%|█▏ | 1446/12188 [3:00:52<22:08:00, 7.42s/it] 12%|█▏ | 1447/12188 [3:00:59<21:49:41, 7.32s/it] {'loss': 0.3957, 'grad_norm': 0.6928690713668324, 'learning_rate': 9.795486367801407e-06, 'epoch': 0.12} + 12%|█▏ | 1447/12188 [3:00:59<21:49:41, 7.32s/it] 12%|█▏ | 1448/12188 [3:01:05<21:14:25, 7.12s/it] {'loss': 0.4455, 'grad_norm': 0.6779588140567485, 'learning_rate': 9.795110072744756e-06, 'epoch': 0.12} + 12%|█▏ | 1448/12188 [3:01:05<21:14:25, 7.12s/it] 12%|█▏ | 1449/12188 [3:01:16<24:16:18, 8.14s/it] {'loss': 0.4089, 'grad_norm': 0.6864053047353782, 'learning_rate': 9.794733439065135e-06, 'epoch': 0.12} + 12%|█▏ | 1449/12188 [3:01:16<24:16:18, 8.14s/it] 12%|█▏ | 1450/12188 [3:01:23<23:18:55, 7.82s/it] {'loss': 0.3751, 'grad_norm': 0.701251921500486, 'learning_rate': 9.794356466789143e-06, 'epoch': 0.12} + 12%|█▏ | 1450/12188 [3:01:23<23:18:55, 7.82s/it] 12%|█▏ | 1451/12188 [3:01:30<22:18:21, 7.48s/it] {'loss': 0.4091, 'grad_norm': 0.6246536266412346, 'learning_rate': 9.793979155943398e-06, 'epoch': 0.12} + 12%|█▏ | 1451/12188 [3:01:30<22:18:21, 7.48s/it] 12%|█▏ | 1452/12188 [3:01:37<22:17:29, 7.47s/it] {'loss': 0.4156, 'grad_norm': 0.7385480238135778, 'learning_rate': 9.793601506554549e-06, 'epoch': 0.12} + 12%|█▏ | 1452/12188 [3:01:37<22:17:29, 7.47s/it] 12%|█▏ | 1453/12188 [3:01:45<22:13:07, 7.45s/it] {'loss': 0.4013, 'grad_norm': 0.686089786193087, 'learning_rate': 9.793223518649262e-06, 'epoch': 0.12} + 12%|█▏ | 1453/12188 [3:01:45<22:13:07, 7.45s/it] 12%|█▏ | 1454/12188 [3:01:51<21:37:44, 7.25s/it] {'loss': 0.3626, 'grad_norm': 0.6669097158424796, 'learning_rate': 9.792845192254231e-06, 'epoch': 0.12} + 12%|█▏ | 1454/12188 [3:01:51<21:37:44, 7.25s/it] 12%|█▏ | 1455/12188 [3:01:58<21:04:46, 7.07s/it] {'loss': 0.3675, 'grad_norm': 0.669275619138824, 'learning_rate': 9.792466527396174e-06, 'epoch': 0.12} + 12%|█▏ | 1455/12188 [3:01:58<21:04:46, 7.07s/it] 12%|█▏ | 1456/12188 [3:02:06<21:25:54, 7.19s/it] {'loss': 0.4007, 'grad_norm': 0.6349744483467281, 'learning_rate': 9.79208752410183e-06, 'epoch': 0.12} + 12%|█▏ | 1456/12188 [3:02:06<21:25:54, 7.19s/it] 12%|█▏ | 1457/12188 [3:02:12<21:13:30, 7.12s/it] {'loss': 0.4324, 'grad_norm': 0.7917296463166117, 'learning_rate': 9.791708182397962e-06, 'epoch': 0.12} + 12%|█▏ | 1457/12188 [3:02:12<21:13:30, 7.12s/it] 12%|█▏ | 1458/12188 [3:02:19<21:01:25, 7.05s/it] {'loss': 0.371, 'grad_norm': 0.696327596787219, 'learning_rate': 9.791328502311363e-06, 'epoch': 0.12} + 12%|█▏ | 1458/12188 [3:02:19<21:01:25, 7.05s/it] 12%|█▏ | 1459/12188 [3:02:28<22:17:43, 7.48s/it] {'loss': 0.4139, 'grad_norm': 0.7164068055883751, 'learning_rate': 9.790948483868843e-06, 'epoch': 0.12} + 12%|█▏ | 1459/12188 [3:02:28<22:17:43, 7.48s/it] 12%|█▏ | 1460/12188 [3:02:36<22:52:17, 7.68s/it] {'loss': 0.3726, 'grad_norm': 0.6135485961271476, 'learning_rate': 9.790568127097238e-06, 'epoch': 0.12} + 12%|█▏ | 1460/12188 [3:02:36<22:52:17, 7.68s/it] 12%|█▏ | 1461/12188 [3:02:43<22:13:50, 7.46s/it] {'loss': 0.4011, 'grad_norm': 0.7427009327387653, 'learning_rate': 9.790187432023408e-06, 'epoch': 0.12} + 12%|█▏ | 1461/12188 [3:02:43<22:13:50, 7.46s/it] 12%|█▏ | 1462/12188 [3:02:50<21:48:21, 7.32s/it] {'loss': 0.4259, 'grad_norm': 0.718986600596263, 'learning_rate': 9.789806398674238e-06, 'epoch': 0.12} + 12%|█▏ | 1462/12188 [3:02:50<21:48:21, 7.32s/it] 12%|█▏ | 1463/12188 [3:02:57<21:31:31, 7.23s/it] {'loss': 0.3611, 'grad_norm': 0.643678238979875, 'learning_rate': 9.789425027076636e-06, 'epoch': 0.12} + 12%|█▏ | 1463/12188 [3:02:57<21:31:31, 7.23s/it] 12%|█▏ | 1464/12188 [3:03:04<21:06:58, 7.09s/it] {'loss': 0.3931, 'grad_norm': 0.672378338074051, 'learning_rate': 9.789043317257532e-06, 'epoch': 0.12} + 12%|█▏ | 1464/12188 [3:03:04<21:06:58, 7.09s/it] 12%|█▏ | 1465/12188 [3:03:10<20:48:31, 6.99s/it] {'loss': 0.3749, 'grad_norm': 0.6889385847152725, 'learning_rate': 9.788661269243884e-06, 'epoch': 0.12} + 12%|█▏ | 1465/12188 [3:03:10<20:48:31, 6.99s/it] 12%|█▏ | 1466/12188 [3:03:17<20:40:51, 6.94s/it] {'loss': 0.3831, 'grad_norm': 0.6049584217908063, 'learning_rate': 9.788278883062672e-06, 'epoch': 0.12} + 12%|█▏ | 1466/12188 [3:03:17<20:40:51, 6.94s/it] 12%|█▏ | 1467/12188 [3:03:25<21:29:40, 7.22s/it] {'loss': 0.422, 'grad_norm': 0.6103393212523193, 'learning_rate': 9.787896158740895e-06, 'epoch': 0.12} + 12%|█▏ | 1467/12188 [3:03:25<21:29:40, 7.22s/it] 12%|█▏ | 1468/12188 [3:03:33<22:07:47, 7.43s/it] {'loss': 0.395, 'grad_norm': 0.6763206537526724, 'learning_rate': 9.787513096305587e-06, 'epoch': 0.12} + 12%|█▏ | 1468/12188 [3:03:33<22:07:47, 7.43s/it] 12%|█▏ | 1469/12188 [3:03:40<21:38:33, 7.27s/it] {'loss': 0.4152, 'grad_norm': 0.6585679302283872, 'learning_rate': 9.787129695783794e-06, 'epoch': 0.12} + 12%|█▏ | 1469/12188 [3:03:40<21:38:33, 7.27s/it] 12%|█▏ | 1470/12188 [3:03:47<21:34:22, 7.25s/it] {'loss': 0.3986, 'grad_norm': 0.655229914171444, 'learning_rate': 9.786745957202593e-06, 'epoch': 0.12} + 12%|█▏ | 1470/12188 [3:03:47<21:34:22, 7.25s/it] 12%|█▏ | 1471/12188 [3:03:55<21:56:44, 7.37s/it] {'loss': 0.3899, 'grad_norm': 0.7672326689671043, 'learning_rate': 9.786361880589084e-06, 'epoch': 0.12} + 12%|█▏ | 1471/12188 [3:03:55<21:56:44, 7.37s/it] 12%|█▏ | 1472/12188 [3:04:02<21:25:25, 7.20s/it] {'loss': 0.4513, 'grad_norm': 0.8163721734610782, 'learning_rate': 9.78597746597039e-06, 'epoch': 0.12} + 12%|█▏ | 1472/12188 [3:04:02<21:25:25, 7.20s/it] 12%|█▏ | 1473/12188 [3:04:09<21:25:43, 7.20s/it] {'loss': 0.4271, 'grad_norm': 0.724878953553388, 'learning_rate': 9.785592713373653e-06, 'epoch': 0.12} + 12%|█▏ | 1473/12188 [3:04:09<21:25:43, 7.20s/it] 12%|█▏ | 1474/12188 [3:04:16<21:15:32, 7.14s/it] {'loss': 0.3971, 'grad_norm': 0.6928832141730781, 'learning_rate': 9.78520762282605e-06, 'epoch': 0.12} + 12%|█▏ | 1474/12188 [3:04:16<21:15:32, 7.14s/it] 12%|█▏ | 1475/12188 [3:04:24<21:51:06, 7.34s/it] {'loss': 0.3817, 'grad_norm': 0.6655896955464186, 'learning_rate': 9.784822194354771e-06, 'epoch': 0.12} + 12%|█▏ | 1475/12188 [3:04:24<21:51:06, 7.34s/it] 12%|█▏ | 1476/12188 [3:04:31<21:43:14, 7.30s/it] {'loss': 0.3726, 'grad_norm': 0.6966923904541389, 'learning_rate': 9.784436427987036e-06, 'epoch': 0.12} + 12%|█▏ | 1476/12188 [3:04:31<21:43:14, 7.30s/it] 12%|█▏ | 1477/12188 [3:04:38<21:44:19, 7.31s/it] {'loss': 0.4058, 'grad_norm': 0.6672374057189091, 'learning_rate': 9.784050323750089e-06, 'epoch': 0.12} + 12%|█▏ | 1477/12188 [3:04:38<21:44:19, 7.31s/it] 12%|█▏ | 1478/12188 [3:04:45<21:31:31, 7.24s/it] {'loss': 0.4166, 'grad_norm': 0.6636620055452467, 'learning_rate': 9.783663881671192e-06, 'epoch': 0.12} + 12%|█▏ | 1478/12188 [3:04:45<21:31:31, 7.24s/it] 12%|█▏ | 1479/12188 [3:04:52<21:12:13, 7.13s/it] {'loss': 0.4372, 'grad_norm': 0.7162779137978101, 'learning_rate': 9.78327710177764e-06, 'epoch': 0.12} + 12%|█▏ | 1479/12188 [3:04:52<21:12:13, 7.13s/it] 12%|█▏ | 1480/12188 [3:04:59<21:07:06, 7.10s/it] {'loss': 0.4075, 'grad_norm': 0.6573824957186701, 'learning_rate': 9.782889984096741e-06, 'epoch': 0.12} + 12%|█▏ | 1480/12188 [3:04:59<21:07:06, 7.10s/it] 12%|█▏ | 1481/12188 [3:05:07<21:45:51, 7.32s/it] {'loss': 0.42, 'grad_norm': 0.6421761311508196, 'learning_rate': 9.78250252865584e-06, 'epoch': 0.12} + 12%|█▏ | 1481/12188 [3:05:07<21:45:51, 7.32s/it] 12%|█▏ | 1482/12188 [3:05:15<22:08:05, 7.44s/it] {'loss': 0.3892, 'grad_norm': 0.6272964953918427, 'learning_rate': 9.78211473548229e-06, 'epoch': 0.12} + 12%|█▏ | 1482/12188 [3:05:15<22:08:05, 7.44s/it] 12%|█▏ | 1483/12188 [3:05:22<22:26:50, 7.55s/it] {'loss': 0.4371, 'grad_norm': 0.6519749527662803, 'learning_rate': 9.781726604603482e-06, 'epoch': 0.12} + 12%|█▏ | 1483/12188 [3:05:23<22:26:50, 7.55s/it] 12%|█▏ | 1484/12188 [3:05:29<21:50:45, 7.35s/it] {'loss': 0.4086, 'grad_norm': 0.6564008272749982, 'learning_rate': 9.781338136046823e-06, 'epoch': 0.12} + 12%|█▏ | 1484/12188 [3:05:29<21:50:45, 7.35s/it] 12%|█▏ | 1485/12188 [3:05:36<21:23:18, 7.19s/it] {'loss': 0.3715, 'grad_norm': 0.6127098811491234, 'learning_rate': 9.780949329839748e-06, 'epoch': 0.12} + 12%|█▏ | 1485/12188 [3:05:36<21:23:18, 7.19s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f71545da5c0> +[Try #0] Failed to fetch sample 4869201 in VC:s3://gui/OS-Atlas/desktop_domain/linux_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f71545da5c0> +Problematic sample: {'image': 'output_20240912_163920_original_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Why won’t DVDs play? You might not have the right codecs installed, or the DVD might be the wrong region.'"}, {'from': 'gpt', 'value': '\nclick(x=0.2156, y=0.4535)\n'}]} + 12%|█▏ | 1486/12188 [3:05:45<22:25:26, 7.54s/it] {'loss': 0.4638, 'grad_norm': 0.7287987623487143, 'learning_rate': 9.78056018600971e-06, 'epoch': 0.12} + 12%|█▏ | 1486/12188 [3:05:45<22:25:26, 7.54s/it] 12%|█▏ | 1487/12188 [3:05:52<22:42:06, 7.64s/it] {'loss': 0.3787, 'grad_norm': 0.6934663468318079, 'learning_rate': 9.780170704584194e-06, 'epoch': 0.12} + 12%|█▏ | 1487/12188 [3:05:52<22:42:06, 7.64s/it] 12%|█▏ | 1488/12188 [3:06:00<22:16:10, 7.49s/it] {'loss': 0.3906, 'grad_norm': 0.6816676073160215, 'learning_rate': 9.779780885590703e-06, 'epoch': 0.12} + 12%|█▏ | 1488/12188 [3:06:00<22:16:10, 7.49s/it] 12%|█▏ | 1489/12188 [3:06:06<21:41:39, 7.30s/it] {'loss': 0.428, 'grad_norm': 0.8000506975955545, 'learning_rate': 9.779390729056764e-06, 'epoch': 0.12} + 12%|█▏ | 1489/12188 [3:06:06<21:41:39, 7.30s/it] 12%|█▏ | 1490/12188 [3:06:14<21:36:29, 7.27s/it] {'loss': 0.4122, 'grad_norm': 0.6905585108785627, 'learning_rate': 9.779000235009932e-06, 'epoch': 0.12} + 12%|█▏ | 1490/12188 [3:06:14<21:36:29, 7.27s/it] 12%|█▏ | 1491/12188 [3:06:22<22:33:24, 7.59s/it] {'loss': 0.4246, 'grad_norm': 0.7353478250838515, 'learning_rate': 9.77860940347778e-06, 'epoch': 0.12} + 12%|█▏ | 1491/12188 [3:06:22<22:33:24, 7.59s/it] 12%|█▏ | 1492/12188 [3:06:29<21:49:56, 7.35s/it] {'loss': 0.4184, 'grad_norm': 0.6542558113652103, 'learning_rate': 9.77821823448791e-06, 'epoch': 0.12} + 12%|█▏ | 1492/12188 [3:06:29<21:49:56, 7.35s/it] 12%|█▏ | 1493/12188 [3:06:35<21:08:55, 7.12s/it] {'loss': 0.3977, 'grad_norm': 0.6961342811356294, 'learning_rate': 9.777826728067945e-06, 'epoch': 0.12} + 12%|█▏ | 1493/12188 [3:06:35<21:08:55, 7.12s/it] 12%|█▏ | 1494/12188 [3:06:42<21:09:52, 7.12s/it] {'loss': 0.4393, 'grad_norm': 0.7620235386275245, 'learning_rate': 9.777434884245533e-06, 'epoch': 0.12} + 12%|█▏ | 1494/12188 [3:06:42<21:09:52, 7.12s/it] 12%|█▏ | 1495/12188 [3:06:49<20:52:39, 7.03s/it] {'loss': 0.3979, 'grad_norm': 0.6669329335694201, 'learning_rate': 9.777042703048345e-06, 'epoch': 0.12} + 12%|█▏ | 1495/12188 [3:06:49<20:52:39, 7.03s/it] 12%|█▏ | 1496/12188 [3:06:56<21:02:23, 7.08s/it] {'loss': 0.4299, 'grad_norm': 0.6577492281073142, 'learning_rate': 9.776650184504075e-06, 'epoch': 0.12} + 12%|█▏ | 1496/12188 [3:06:56<21:02:23, 7.08s/it] 12%|█▏ | 1497/12188 [3:07:03<20:48:45, 7.01s/it] {'loss': 0.3963, 'grad_norm': 0.6004601414485256, 'learning_rate': 9.776257328640444e-06, 'epoch': 0.12} + 12%|█▏ | 1497/12188 [3:07:03<20:48:45, 7.01s/it] 12%|█▏ | 1498/12188 [3:07:11<21:45:02, 7.32s/it] {'loss': 0.3859, 'grad_norm': 0.6735251535741144, 'learning_rate': 9.775864135485194e-06, 'epoch': 0.12} + 12%|█▏ | 1498/12188 [3:07:11<21:45:02, 7.32s/it] 12%|█▏ | 1499/12188 [3:07:18<21:20:58, 7.19s/it] {'loss': 0.4106, 'grad_norm': 0.6649752938090745, 'learning_rate': 9.775470605066091e-06, 'epoch': 0.12} + 12%|█▏ | 1499/12188 [3:07:18<21:20:58, 7.19s/it] 12%|█▏ | 1500/12188 [3:07:26<21:47:38, 7.34s/it] {'loss': 0.427, 'grad_norm': 0.7717801592655892, 'learning_rate': 9.77507673741093e-06, 'epoch': 0.12} + 12%|█▏ | 1500/12188 [3:07:26<21:47:38, 7.34s/it] 12%|█▏ | 1501/12188 [3:07:34<22:44:40, 7.66s/it] {'loss': 0.3843, 'grad_norm': 0.6541956403553429, 'learning_rate': 9.774682532547517e-06, 'epoch': 0.12} + 12%|█▏ | 1501/12188 [3:07:34<22:44:40, 7.66s/it] 12%|█▏ | 1502/12188 [3:07:41<22:15:45, 7.50s/it] {'loss': 0.3947, 'grad_norm': 0.6331243362936241, 'learning_rate': 9.774287990503696e-06, 'epoch': 0.12} + 12%|█▏ | 1502/12188 [3:07:41<22:15:45, 7.50s/it] 12%|█▏ | 1503/12188 [3:07:48<21:39:55, 7.30s/it] {'loss': 0.44, 'grad_norm': 0.7189759375048429, 'learning_rate': 9.773893111307328e-06, 'epoch': 0.12} + 12%|█▏ | 1503/12188 [3:07:48<21:39:55, 7.30s/it] 12%|█▏ | 1504/12188 [3:07:58<23:57:32, 8.07s/it] {'loss': 0.4379, 'grad_norm': 0.7537063038275403, 'learning_rate': 9.773497894986299e-06, 'epoch': 0.12} + 12%|█▏ | 1504/12188 [3:07:58<23:57:32, 8.07s/it] 12%|█▏ | 1505/12188 [3:08:06<23:35:48, 7.95s/it] {'loss': 0.388, 'grad_norm': 0.7086106428158296, 'learning_rate': 9.773102341568516e-06, 'epoch': 0.12} + 12%|█▏ | 1505/12188 [3:08:06<23:35:48, 7.95s/it] 12%|█▏ | 1506/12188 [3:08:13<22:27:06, 7.57s/it] {'loss': 0.3654, 'grad_norm': 0.6340296492106668, 'learning_rate': 9.772706451081916e-06, 'epoch': 0.12} + 12%|█▏ | 1506/12188 [3:08:13<22:27:06, 7.57s/it] 12%|█▏ | 1507/12188 [3:08:20<21:57:11, 7.40s/it] {'loss': 0.4101, 'grad_norm': 0.6762545384310434, 'learning_rate': 9.772310223554453e-06, 'epoch': 0.12} + 12%|█▏ | 1507/12188 [3:08:20<21:57:11, 7.40s/it] 12%|█▏ | 1508/12188 [3:08:27<22:05:32, 7.45s/it] {'loss': 0.3855, 'grad_norm': 0.7235524084373043, 'learning_rate': 9.77191365901411e-06, 'epoch': 0.12} + 12%|█▏ | 1508/12188 [3:08:27<22:05:32, 7.45s/it] 12%|█▏ | 1509/12188 [3:08:35<22:20:09, 7.53s/it] {'loss': 0.4371, 'grad_norm': 0.8211075387319363, 'learning_rate': 9.771516757488893e-06, 'epoch': 0.12} + 12%|█▏ | 1509/12188 [3:08:35<22:20:09, 7.53s/it] 12%|█▏ | 1510/12188 [3:08:42<21:34:36, 7.27s/it] {'loss': 0.4094, 'grad_norm': 0.6323429640408694, 'learning_rate': 9.771119519006827e-06, 'epoch': 0.12} + 12%|█▏ | 1510/12188 [3:08:42<21:34:36, 7.27s/it] 12%|█▏ | 1511/12188 [3:08:48<21:00:46, 7.09s/it] {'loss': 0.4477, 'grad_norm': 0.7516039792481141, 'learning_rate': 9.770721943595964e-06, 'epoch': 0.12} + 12%|█▏ | 1511/12188 [3:08:48<21:00:46, 7.09s/it] 12%|█▏ | 1512/12188 [3:08:55<21:07:07, 7.12s/it] {'loss': 0.379, 'grad_norm': 0.6920821834308618, 'learning_rate': 9.770324031284384e-06, 'epoch': 0.12} + 12%|█▏ | 1512/12188 [3:08:55<21:07:07, 7.12s/it] 12%|█▏ | 1513/12188 [3:09:03<21:23:54, 7.22s/it] {'loss': 0.3733, 'grad_norm': 0.6751960970228029, 'learning_rate': 9.769925782100185e-06, 'epoch': 0.12} + 12%|█▏ | 1513/12188 [3:09:03<21:23:54, 7.22s/it] 12%|█▏ | 1514/12188 [3:09:10<21:19:52, 7.19s/it] {'loss': 0.4277, 'grad_norm': 0.6437742319098139, 'learning_rate': 9.76952719607149e-06, 'epoch': 0.12} + 12%|█▏ | 1514/12188 [3:09:10<21:19:52, 7.19s/it] 12%|█▏ | 1515/12188 [3:09:19<22:45:24, 7.68s/it] {'loss': 0.4092, 'grad_norm': 0.6640988931469584, 'learning_rate': 9.769128273226448e-06, 'epoch': 0.12} + 12%|█▏ | 1515/12188 [3:09:19<22:45:24, 7.68s/it] 12%|█▏ | 1516/12188 [3:09:28<24:18:25, 8.20s/it] {'loss': 0.4295, 'grad_norm': 0.7973477720412948, 'learning_rate': 9.76872901359323e-06, 'epoch': 0.12} + 12%|█▏ | 1516/12188 [3:09:28<24:18:25, 8.20s/it] 12%|█▏ | 1517/12188 [3:09:36<23:50:16, 8.04s/it] {'loss': 0.428, 'grad_norm': 0.6922730630024705, 'learning_rate': 9.768329417200029e-06, 'epoch': 0.12} + 12%|█▏ | 1517/12188 [3:09:36<23:50:16, 8.04s/it] 12%|█▏ | 1518/12188 [3:09:43<22:37:52, 7.64s/it] {'loss': 0.3755, 'grad_norm': 0.6986213829772566, 'learning_rate': 9.767929484075067e-06, 'epoch': 0.12} + 12%|█▏ | 1518/12188 [3:09:43<22:37:52, 7.64s/it] 12%|█▏ | 1519/12188 [3:09:49<21:57:41, 7.41s/it] {'loss': 0.3936, 'grad_norm': 0.6493608730621981, 'learning_rate': 9.767529214246584e-06, 'epoch': 0.12} + 12%|█▏ | 1519/12188 [3:09:49<21:57:41, 7.41s/it] 12%|█▏ | 1520/12188 [3:09:56<21:36:11, 7.29s/it] {'loss': 0.3665, 'grad_norm': 0.6253465645438284, 'learning_rate': 9.767128607742848e-06, 'epoch': 0.12} + 12%|█▏ | 1520/12188 [3:09:56<21:36:11, 7.29s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fca8925c310> +[Try #0] Failed to fetch sample 4420657 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fca8925c310> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'What links here'"}, {'from': 'gpt', 'value': '\nclick(x=0.8555, y=0.361)\n'}]} + 12%|█▏ | 1521/12188 [3:10:04<21:44:23, 7.34s/it] {'loss': 0.3985, 'grad_norm': 0.7233093936495785, 'learning_rate': 9.766727664592148e-06, 'epoch': 0.12} + 12%|█▏ | 1521/12188 [3:10:04<21:44:23, 7.34s/it] 12%|█▏ | 1522/12188 [3:10:11<21:08:37, 7.14s/it] {'loss': 0.4156, 'grad_norm': 0.685905721431123, 'learning_rate': 9.7663263848228e-06, 'epoch': 0.12} + 12%|█▏ | 1522/12188 [3:10:11<21:08:37, 7.14s/it] 12%|█▏ | 1523/12188 [3:10:18<21:13:52, 7.17s/it] {'loss': 0.3775, 'grad_norm': 0.6682189641188312, 'learning_rate': 9.76592476846314e-06, 'epoch': 0.12} + 12%|█▏ | 1523/12188 [3:10:18<21:13:52, 7.17s/it] 13%|█▎ | 1524/12188 [3:10:25<21:19:22, 7.20s/it] {'loss': 0.4016, 'grad_norm': 0.6585730601204989, 'learning_rate': 9.76552281554153e-06, 'epoch': 0.13} + 13%|█▎ | 1524/12188 [3:10:25<21:19:22, 7.20s/it] 13%|█▎ | 1525/12188 [3:10:33<21:57:04, 7.41s/it] {'loss': 0.4087, 'grad_norm': 0.6364049782858605, 'learning_rate': 9.765120526086353e-06, 'epoch': 0.13} + 13%|█▎ | 1525/12188 [3:10:33<21:57:04, 7.41s/it] 13%|█▎ | 1526/12188 [3:10:40<21:31:34, 7.27s/it] {'loss': 0.4081, 'grad_norm': 0.6612288014458426, 'learning_rate': 9.764717900126024e-06, 'epoch': 0.13} + 13%|█▎ | 1526/12188 [3:10:40<21:31:34, 7.27s/it] 13%|█▎ | 1527/12188 [3:10:48<22:20:45, 7.55s/it] {'loss': 0.3961, 'grad_norm': 0.6983621467503891, 'learning_rate': 9.764314937688968e-06, 'epoch': 0.13} + 13%|█▎ | 1527/12188 [3:10:48<22:20:45, 7.55s/it] 13%|█▎ | 1528/12188 [3:10:55<21:32:02, 7.27s/it] {'loss': 0.37, 'grad_norm': 0.604202871290534, 'learning_rate': 9.76391163880365e-06, 'epoch': 0.13} + 13%|█▎ | 1528/12188 [3:10:55<21:32:02, 7.27s/it] 13%|█▎ | 1529/12188 [3:11:01<20:59:03, 7.09s/it] {'loss': 0.4139, 'grad_norm': 0.6796838387294488, 'learning_rate': 9.763508003498543e-06, 'epoch': 0.13} + 13%|█▎ | 1529/12188 [3:11:01<20:59:03, 7.09s/it] 13%|█▎ | 1530/12188 [3:11:08<20:41:02, 6.99s/it] {'loss': 0.4268, 'grad_norm': 0.6846546120982818, 'learning_rate': 9.763104031802153e-06, 'epoch': 0.13} + 13%|█▎ | 1530/12188 [3:11:08<20:41:02, 6.99s/it] 13%|█▎ | 1531/12188 [3:11:15<20:44:14, 7.01s/it] {'loss': 0.4318, 'grad_norm': 0.6839907930543507, 'learning_rate': 9.76269972374301e-06, 'epoch': 0.13} + 13%|█▎ | 1531/12188 [3:11:15<20:44:14, 7.01s/it] 13%|█▎ | 1532/12188 [3:11:22<20:38:14, 6.97s/it] {'loss': 0.39, 'grad_norm': 0.6276950126444646, 'learning_rate': 9.762295079349665e-06, 'epoch': 0.13} + 13%|█▎ | 1532/12188 [3:11:22<20:38:14, 6.97s/it] 13%|█▎ | 1533/12188 [3:11:29<20:47:59, 7.03s/it] {'loss': 0.4159, 'grad_norm': 0.6586170956670886, 'learning_rate': 9.761890098650693e-06, 'epoch': 0.13} + 13%|█▎ | 1533/12188 [3:11:29<20:47:59, 7.03s/it] 13%|█▎ | 1534/12188 [3:11:36<20:41:51, 6.99s/it] {'loss': 0.393, 'grad_norm': 0.6274130409913639, 'learning_rate': 9.761484781674692e-06, 'epoch': 0.13} + 13%|█▎ | 1534/12188 [3:11:36<20:41:51, 6.99s/it] 13%|█▎ | 1535/12188 [3:11:43<20:52:28, 7.05s/it] {'loss': 0.4414, 'grad_norm': 0.6885647765213672, 'learning_rate': 9.761079128450286e-06, 'epoch': 0.13} + 13%|█▎ | 1535/12188 [3:11:43<20:52:28, 7.05s/it] 13%|█▎ | 1536/12188 [3:11:51<21:12:38, 7.17s/it] {'loss': 0.3983, 'grad_norm': 0.6770569000456611, 'learning_rate': 9.76067313900612e-06, 'epoch': 0.13} + 13%|█▎ | 1536/12188 [3:11:51<21:12:38, 7.17s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f26e295f1a0> +[Try #0] Failed to fetch sample 4511138 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f26e295f1a0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Developers'"}, {'from': 'gpt', 'value': '\nclick(x=0.951, y=0.558)\n'}]} + 13%|█▎ | 1537/12188 [3:12:01<23:46:08, 8.03s/it] {'loss': 0.3896, 'grad_norm': 0.6025019133497252, 'learning_rate': 9.760266813370866e-06, 'epoch': 0.13} + 13%|█▎ | 1537/12188 [3:12:01<23:46:08, 8.03s/it] 13%|█▎ | 1538/12188 [3:12:07<22:33:09, 7.62s/it] {'loss': 0.4056, 'grad_norm': 0.672886752232527, 'learning_rate': 9.759860151573219e-06, 'epoch': 0.13} + 13%|█▎ | 1538/12188 [3:12:07<22:33:09, 7.62s/it] 13%|█▎ | 1539/12188 [3:12:14<21:59:36, 7.44s/it] {'loss': 0.4026, 'grad_norm': 0.6814796459924373, 'learning_rate': 9.759453153641894e-06, 'epoch': 0.13} + 13%|█▎ | 1539/12188 [3:12:14<21:59:36, 7.44s/it] 13%|█▎ | 1540/12188 [3:12:22<21:53:29, 7.40s/it] {'loss': 0.4298, 'grad_norm': 0.6230451476365311, 'learning_rate': 9.759045819605635e-06, 'epoch': 0.13} + 13%|█▎ | 1540/12188 [3:12:22<21:53:29, 7.40s/it] 13%|█▎ | 1541/12188 [3:12:29<21:17:32, 7.20s/it] {'loss': 0.382, 'grad_norm': 0.6122463261793842, 'learning_rate': 9.758638149493205e-06, 'epoch': 0.13} + 13%|█▎ | 1541/12188 [3:12:29<21:17:32, 7.20s/it] 13%|█▎ | 1542/12188 [3:12:35<20:42:35, 7.00s/it] {'loss': 0.4223, 'grad_norm': 0.6536640626353912, 'learning_rate': 9.758230143333394e-06, 'epoch': 0.13} + 13%|█▎ | 1542/12188 [3:12:35<20:42:35, 7.00s/it] 13%|█▎ | 1543/12188 [3:12:42<20:34:01, 6.96s/it] {'loss': 0.4651, 'grad_norm': 0.7365533569634384, 'learning_rate': 9.757821801155016e-06, 'epoch': 0.13} + 13%|█▎ | 1543/12188 [3:12:42<20:34:01, 6.96s/it] 13%|█▎ | 1544/12188 [3:12:49<20:27:04, 6.92s/it] {'loss': 0.422, 'grad_norm': 0.6913869091421551, 'learning_rate': 9.757413122986907e-06, 'epoch': 0.13} + 13%|█▎ | 1544/12188 [3:12:49<20:27:04, 6.92s/it] 13%|█▎ | 1545/12188 [3:12:57<21:46:44, 7.37s/it] {'loss': 0.3927, 'grad_norm': 0.6993776107680874, 'learning_rate': 9.757004108857924e-06, 'epoch': 0.13} + 13%|█▎ | 1545/12188 [3:12:57<21:46:44, 7.37s/it] 13%|█▎ | 1546/12188 [3:13:05<22:06:55, 7.48s/it] {'loss': 0.406, 'grad_norm': 0.763587923762825, 'learning_rate': 9.756594758796955e-06, 'epoch': 0.13} + 13%|█▎ | 1546/12188 [3:13:05<22:06:55, 7.48s/it] 13%|█▎ | 1547/12188 [3:13:12<21:34:29, 7.30s/it] {'loss': 0.3647, 'grad_norm': 0.6484544809199568, 'learning_rate': 9.756185072832908e-06, 'epoch': 0.13} + 13%|█▎ | 1547/12188 [3:13:12<21:34:29, 7.30s/it] 13%|█▎ | 1548/12188 [3:13:19<21:10:18, 7.16s/it] {'loss': 0.3707, 'grad_norm': 0.7828990233929114, 'learning_rate': 9.755775050994711e-06, 'epoch': 0.13} + 13%|█▎ | 1548/12188 [3:13:19<21:10:18, 7.16s/it] 13%|█▎ | 1549/12188 [3:13:26<21:18:20, 7.21s/it] {'loss': 0.4225, 'grad_norm': 0.6855145497074832, 'learning_rate': 9.75536469331132e-06, 'epoch': 0.13} + 13%|█▎ | 1549/12188 [3:13:26<21:18:20, 7.21s/it] 13%|█▎ | 1550/12188 [3:13:34<21:42:05, 7.34s/it] {'loss': 0.4005, 'grad_norm': 0.6110355531734428, 'learning_rate': 9.754953999811716e-06, 'epoch': 0.13} + 13%|█▎ | 1550/12188 [3:13:34<21:42:05, 7.34s/it] 13%|█▎ | 1551/12188 [3:13:41<21:28:46, 7.27s/it] {'loss': 0.3965, 'grad_norm': 0.6659901079347503, 'learning_rate': 9.754542970524899e-06, 'epoch': 0.13} + 13%|█▎ | 1551/12188 [3:13:41<21:28:46, 7.27s/it] 13%|█▎ | 1552/12188 [3:13:48<21:39:21, 7.33s/it] {'loss': 0.3809, 'grad_norm': 0.6623992394837738, 'learning_rate': 9.754131605479895e-06, 'epoch': 0.13} + 13%|█▎ | 1552/12188 [3:13:48<21:39:21, 7.33s/it] 13%|█▎ | 1553/12188 [3:13:55<21:33:21, 7.30s/it] {'loss': 0.4085, 'grad_norm': 0.6910033007910751, 'learning_rate': 9.753719904705757e-06, 'epoch': 0.13} + 13%|█▎ | 1553/12188 [3:13:55<21:33:21, 7.30s/it] 13%|█▎ | 1554/12188 [3:14:03<21:27:48, 7.27s/it] {'loss': 0.3576, 'grad_norm': 0.6990140043657594, 'learning_rate': 9.753307868231556e-06, 'epoch': 0.13} + 13%|█▎ | 1554/12188 [3:14:03<21:27:48, 7.27s/it] 13%|█▎ | 1555/12188 [3:14:10<21:51:03, 7.40s/it] {'loss': 0.3828, 'grad_norm': 0.6845144052937195, 'learning_rate': 9.75289549608639e-06, 'epoch': 0.13} + 13%|█▎ | 1555/12188 [3:14:10<21:51:03, 7.40s/it] 13%|█▎ | 1556/12188 [3:14:17<21:16:23, 7.20s/it] {'loss': 0.4241, 'grad_norm': 0.671880369109681, 'learning_rate': 9.75248278829938e-06, 'epoch': 0.13} + 13%|█▎ | 1556/12188 [3:14:17<21:16:23, 7.20s/it] 13%|█▎ | 1557/12188 [3:14:25<22:17:37, 7.55s/it] {'loss': 0.376, 'grad_norm': 0.6684365876203728, 'learning_rate': 9.752069744899672e-06, 'epoch': 0.13} + 13%|█▎ | 1557/12188 [3:14:25<22:17:37, 7.55s/it] 13%|█▎ | 1558/12188 [3:14:34<22:51:40, 7.74s/it] {'loss': 0.4118, 'grad_norm': 0.67072213830453, 'learning_rate': 9.751656365916434e-06, 'epoch': 0.13} + 13%|█▎ | 1558/12188 [3:14:34<22:51:40, 7.74s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f096b57db70> +[Try #0] Failed to fetch sample 4512181 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f096b57db70> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'More'"}, {'from': 'gpt', 'value': '\nclick(x=0.927, y=0.1245)\n'}]} + 13%|█▎ | 1559/12188 [3:14:40<21:49:53, 7.39s/it] {'loss': 0.396, 'grad_norm': 0.6849174155905544, 'learning_rate': 9.751242651378856e-06, 'epoch': 0.13} + 13%|█▎ | 1559/12188 [3:14:40<21:49:53, 7.39s/it] 13%|█▎ | 1560/12188 [3:14:48<21:53:10, 7.41s/it] {'loss': 0.3809, 'grad_norm': 0.6586252790697917, 'learning_rate': 9.750828601316155e-06, 'epoch': 0.13} + 13%|█▎ | 1560/12188 [3:14:48<21:53:10, 7.41s/it] 13%|█▎ | 1561/12188 [3:14:55<21:40:08, 7.34s/it] {'loss': 0.4133, 'grad_norm': 0.6701736534069487, 'learning_rate': 9.750414215757571e-06, 'epoch': 0.13} + 13%|█▎ | 1561/12188 [3:14:55<21:40:08, 7.34s/it] 13%|█▎ | 1562/12188 [3:15:02<21:21:35, 7.24s/it] {'loss': 0.4195, 'grad_norm': 0.6890800721852113, 'learning_rate': 9.749999494732368e-06, 'epoch': 0.13} + 13%|█▎ | 1562/12188 [3:15:02<21:21:35, 7.24s/it] 13%|█▎ | 1563/12188 [3:15:12<23:39:53, 8.02s/it] {'loss': 0.3707, 'grad_norm': 0.641967336951711, 'learning_rate': 9.749584438269833e-06, 'epoch': 0.13} + 13%|█▎ | 1563/12188 [3:15:12<23:39:53, 8.02s/it] 13%|█▎ | 1564/12188 [3:15:18<22:36:48, 7.66s/it] {'loss': 0.4076, 'grad_norm': 0.6677077963114703, 'learning_rate': 9.749169046399274e-06, 'epoch': 0.13} + 13%|█▎ | 1564/12188 [3:15:18<22:36:48, 7.66s/it] 13%|█▎ | 1565/12188 [3:15:25<21:50:02, 7.40s/it] {'loss': 0.3893, 'grad_norm': 0.6692243003723569, 'learning_rate': 9.748753319150028e-06, 'epoch': 0.13} + 13%|█▎ | 1565/12188 [3:15:25<21:50:02, 7.40s/it] 13%|█▎ | 1566/12188 [3:15:32<21:36:42, 7.32s/it] {'loss': 0.3962, 'grad_norm': 0.6573151672467713, 'learning_rate': 9.748337256551452e-06, 'epoch': 0.13} + 13%|█▎ | 1566/12188 [3:15:32<21:36:42, 7.32s/it] 13%|█▎ | 1567/12188 [3:15:39<20:57:32, 7.10s/it] {'loss': 0.4031, 'grad_norm': 0.705883332149516, 'learning_rate': 9.747920858632926e-06, 'epoch': 0.13} + 13%|█▎ | 1567/12188 [3:15:39<20:57:32, 7.10s/it] 13%|█▎ | 1568/12188 [3:15:46<20:35:51, 6.98s/it] {'loss': 0.4079, 'grad_norm': 0.647385654723008, 'learning_rate': 9.74750412542386e-06, 'epoch': 0.13} + 13%|█▎ | 1568/12188 [3:15:46<20:35:51, 6.98s/it] 13%|█▎ | 1569/12188 [3:15:53<20:27:25, 6.94s/it] {'loss': 0.4003, 'grad_norm': 0.6753248391061647, 'learning_rate': 9.747087056953679e-06, 'epoch': 0.13} + 13%|█▎ | 1569/12188 [3:15:53<20:27:25, 6.94s/it] 13%|█▎ | 1570/12188 [3:15:59<20:18:30, 6.89s/it] {'loss': 0.4215, 'grad_norm': 0.6822622235183246, 'learning_rate': 9.746669653251837e-06, 'epoch': 0.13} + 13%|█▎ | 1570/12188 [3:15:59<20:18:30, 6.89s/it] 13%|█▎ | 1571/12188 [3:16:06<20:20:46, 6.90s/it] {'loss': 0.4132, 'grad_norm': 0.6666354553330124, 'learning_rate': 9.746251914347809e-06, 'epoch': 0.13} + 13%|█▎ | 1571/12188 [3:16:06<20:20:46, 6.90s/it] 13%|█▎ | 1572/12188 [3:16:13<20:15:37, 6.87s/it] {'loss': 0.455, 'grad_norm': 0.6266546383189241, 'learning_rate': 9.745833840271098e-06, 'epoch': 0.13} + 13%|█▎ | 1572/12188 [3:16:13<20:15:37, 6.87s/it] 13%|█▎ | 1573/12188 [3:16:23<22:58:20, 7.79s/it] {'loss': 0.3886, 'grad_norm': 0.6721047018914189, 'learning_rate': 9.745415431051224e-06, 'epoch': 0.13} + 13%|█▎ | 1573/12188 [3:16:23<22:58:20, 7.79s/it] 13%|█▎ | 1574/12188 [3:16:30<22:24:50, 7.60s/it] {'loss': 0.3843, 'grad_norm': 0.642027546087138, 'learning_rate': 9.744996686717738e-06, 'epoch': 0.13} + 13%|█▎ | 1574/12188 [3:16:30<22:24:50, 7.60s/it] 13%|█▎ | 1575/12188 [3:16:38<22:23:39, 7.60s/it] {'loss': 0.3941, 'grad_norm': 0.6675568600188481, 'learning_rate': 9.74457760730021e-06, 'epoch': 0.13} + 13%|█▎ | 1575/12188 [3:16:38<22:23:39, 7.60s/it] 13%|█▎ | 1576/12188 [3:16:44<21:37:44, 7.34s/it] {'loss': 0.3852, 'grad_norm': 0.677596289820008, 'learning_rate': 9.744158192828232e-06, 'epoch': 0.13} + 13%|█▎ | 1576/12188 [3:16:44<21:37:44, 7.34s/it] 13%|█▎ | 1577/12188 [3:16:52<21:55:00, 7.44s/it] {'loss': 0.431, 'grad_norm': 0.7176925654231475, 'learning_rate': 9.743738443331426e-06, 'epoch': 0.13} + 13%|█▎ | 1577/12188 [3:16:52<21:55:00, 7.44s/it] 13%|█▎ | 1578/12188 [3:17:00<22:00:54, 7.47s/it] {'loss': 0.419, 'grad_norm': 0.7021580854790684, 'learning_rate': 9.743318358839431e-06, 'epoch': 0.13} + 13%|█▎ | 1578/12188 [3:17:00<22:00:54, 7.47s/it] 13%|█▎ | 1579/12188 [3:17:06<21:21:02, 7.25s/it] {'loss': 0.3872, 'grad_norm': 0.6238288363839153, 'learning_rate': 9.742897939381915e-06, 'epoch': 0.13} + 13%|█▎ | 1579/12188 [3:17:06<21:21:02, 7.25s/it] 13%|█▎ | 1580/12188 [3:17:14<21:32:40, 7.31s/it] {'loss': 0.3688, 'grad_norm': 0.6363286311283066, 'learning_rate': 9.742477184988566e-06, 'epoch': 0.13} + 13%|█▎ | 1580/12188 [3:17:14<21:32:40, 7.31s/it] 13%|█▎ | 1581/12188 [3:17:21<21:25:01, 7.27s/it] {'loss': 0.4184, 'grad_norm': 0.6483499332765814, 'learning_rate': 9.7420560956891e-06, 'epoch': 0.13} + 13%|█▎ | 1581/12188 [3:17:21<21:25:01, 7.27s/it] 13%|█▎ | 1582/12188 [3:17:28<21:26:55, 7.28s/it] {'loss': 0.3918, 'grad_norm': 0.6944101918132317, 'learning_rate': 9.741634671513248e-06, 'epoch': 0.13} + 13%|█▎ | 1582/12188 [3:17:28<21:26:55, 7.28s/it] 13%|█▎ | 1583/12188 [3:17:35<20:58:30, 7.12s/it] {'loss': 0.394, 'grad_norm': 0.6429135169430492, 'learning_rate': 9.741212912490776e-06, 'epoch': 0.13} + 13%|█▎ | 1583/12188 [3:17:35<20:58:30, 7.12s/it] 13%|█▎ | 1584/12188 [3:17:43<21:17:12, 7.23s/it] {'loss': 0.392, 'grad_norm': 0.677766976561247, 'learning_rate': 9.740790818651464e-06, 'epoch': 0.13} + 13%|█▎ | 1584/12188 [3:17:43<21:17:12, 7.23s/it] 13%|█▎ | 1585/12188 [3:17:49<20:59:30, 7.13s/it] {'loss': 0.419, 'grad_norm': 0.6514365716765502, 'learning_rate': 9.740368390025121e-06, 'epoch': 0.13} + 13%|█▎ | 1585/12188 [3:17:49<20:59:30, 7.13s/it] 13%|█▎ | 1586/12188 [3:17:56<20:50:31, 7.08s/it] {'loss': 0.4563, 'grad_norm': 0.6811582557854216, 'learning_rate': 9.739945626641579e-06, 'epoch': 0.13} + 13%|█▎ | 1586/12188 [3:17:56<20:50:31, 7.08s/it] 13%|█▎ | 1587/12188 [3:18:04<21:08:45, 7.18s/it] {'loss': 0.4136, 'grad_norm': 0.6358867166150217, 'learning_rate': 9.73952252853069e-06, 'epoch': 0.13} + 13%|█▎ | 1587/12188 [3:18:04<21:08:45, 7.18s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 90, in pil_loader + return img.convert("RGB") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 993, in convert + self.load() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 319, in load + raise _get_oserror(err_code, encoder=False) +OSError: unrecognized data stream contents when reading image file +[Try #0] Failed to fetch sample 6013536 in VC:s3://gui-agent/data_20250623/windows_augment/images. Exception: unrecognized data stream contents when reading image file +Problematic sample: {'image': 'autocad/20250508_161646_1/images/before_screenshot_1_id_73_internvl_position_crop_0_grounding_instructions_point_o_paste.png', 'conversations': [{'from': 'human', 'value': "\nExtract the coordinates for: Located in the top ribbon toolbar of AutoCAD Mechanical 2019, within the Data section. It sits to the right of the 'Update Fields' button and to the left of the 'Download from Source' button in the same toolbar row."}, {'from': 'gpt', 'value': "Located in the top ribbon toolbar of AutoCAD Mechanical 2019, within the Data section. It sits to the right of the 'Update Fields' button and to the left of the 'Download from Source' button in the same toolbar row.[[343, 618, 355, 655]]"}], 'width': 3024, 'height': 1964} + 13%|█▎ | 1588/12188 [3:18:11<20:46:12, 7.05s/it] {'loss': 0.4028, 'grad_norm': 0.6276063124106761, 'learning_rate': 9.739099095722338e-06, 'epoch': 0.13} + 13%|█▎ | 1588/12188 [3:18:11<20:46:12, 7.05s/it] 13%|█▎ | 1589/12188 [3:18:17<20:32:34, 6.98s/it] {'loss': 0.3734, 'grad_norm': 0.6369402329269075, 'learning_rate': 9.738675328246421e-06, 'epoch': 0.13} + 13%|█▎ | 1589/12188 [3:18:17<20:32:34, 6.98s/it] 13%|█▎ | 1590/12188 [3:18:24<20:30:33, 6.97s/it] {'loss': 0.408, 'grad_norm': 0.6460562635696737, 'learning_rate': 9.738251226132864e-06, 'epoch': 0.13} + 13%|█▎ | 1590/12188 [3:18:24<20:30:33, 6.97s/it] 13%|█▎ | 1591/12188 [3:18:31<20:30:48, 6.97s/it] {'loss': 0.42, 'grad_norm': 0.643767647005558, 'learning_rate': 9.73782678941162e-06, 'epoch': 0.13} + 13%|█▎ | 1591/12188 [3:18:31<20:30:48, 6.97s/it] 13%|█▎ | 1592/12188 [3:18:38<20:19:10, 6.90s/it] {'loss': 0.4186, 'grad_norm': 0.7031999774290523, 'learning_rate': 9.737402018112658e-06, 'epoch': 0.13} + 13%|█▎ | 1592/12188 [3:18:38<20:19:10, 6.90s/it] 13%|█▎ | 1593/12188 [3:18:47<21:46:52, 7.40s/it] {'loss': 0.4617, 'grad_norm': 0.6590667109692648, 'learning_rate': 9.736976912265978e-06, 'epoch': 0.13} + 13%|█▎ | 1593/12188 [3:18:47<21:46:52, 7.40s/it] 13%|█▎ | 1594/12188 [3:18:53<21:08:14, 7.18s/it] {'loss': 0.4534, 'grad_norm': 0.6762325585216047, 'learning_rate': 9.7365514719016e-06, 'epoch': 0.13} + 13%|█▎ | 1594/12188 [3:18:53<21:08:14, 7.18s/it] 13%|█▎ | 1595/12188 [3:19:01<21:18:07, 7.24s/it] {'loss': 0.3726, 'grad_norm': 0.6401596533804377, 'learning_rate': 9.736125697049565e-06, 'epoch': 0.13} + 13%|█▎ | 1595/12188 [3:19:01<21:18:07, 7.24s/it] 13%|█▎ | 1596/12188 [3:19:13<25:26:33, 8.65s/it] {'loss': 0.3589, 'grad_norm': 0.6405624940820871, 'learning_rate': 9.735699587739943e-06, 'epoch': 0.13} + 13%|█▎ | 1596/12188 [3:19:13<25:26:33, 8.65s/it] 13%|█▎ | 1597/12188 [3:19:20<24:06:21, 8.19s/it] {'loss': 0.397, 'grad_norm': 0.656633938705024, 'learning_rate': 9.735273144002825e-06, 'epoch': 0.13} + 13%|█▎ | 1597/12188 [3:19:20<24:06:21, 8.19s/it] 13%|█▎ | 1598/12188 [3:19:27<23:17:17, 7.92s/it] {'loss': 0.4297, 'grad_norm': 0.6453958977047897, 'learning_rate': 9.734846365868324e-06, 'epoch': 0.13} + 13%|█▎ | 1598/12188 [3:19:27<23:17:17, 7.92s/it] 13%|█▎ | 1599/12188 [3:19:34<22:42:11, 7.72s/it] {'loss': 0.4506, 'grad_norm': 0.6388074051077589, 'learning_rate': 9.734419253366582e-06, 'epoch': 0.13} + 13%|█▎ | 1599/12188 [3:19:34<22:42:11, 7.72s/it] 13%|█▎ | 1600/12188 [3:19:42<22:35:51, 7.68s/it] {'loss': 0.3706, 'grad_norm': 0.6481145668065146, 'learning_rate': 9.733991806527758e-06, 'epoch': 0.13} + 13%|█▎ | 1600/12188 [3:19:42<22:35:51, 7.68s/it] 13%|█▎ | 1601/12188 [3:19:49<22:04:12, 7.50s/it] {'loss': 0.3948, 'grad_norm': 0.6499197453755916, 'learning_rate': 9.733564025382036e-06, 'epoch': 0.13} + 13%|█▎ | 1601/12188 [3:19:49<22:04:12, 7.50s/it] 13%|█▎ | 1602/12188 [3:19:56<21:54:57, 7.45s/it] {'loss': 0.3958, 'grad_norm': 0.6441508951583312, 'learning_rate': 9.73313590995963e-06, 'epoch': 0.13} + 13%|█▎ | 1602/12188 [3:19:56<21:54:57, 7.45s/it] 13%|█▎ | 1603/12188 [3:20:03<21:33:45, 7.33s/it] {'loss': 0.4084, 'grad_norm': 0.6508959288621353, 'learning_rate': 9.73270746029077e-06, 'epoch': 0.13} + 13%|█▎ | 1603/12188 [3:20:03<21:33:45, 7.33s/it] 13%|█▎ | 1604/12188 [3:20:11<21:36:12, 7.35s/it] {'loss': 0.3913, 'grad_norm': 0.5937972693079825, 'learning_rate': 9.732278676405714e-06, 'epoch': 0.13} + 13%|█▎ | 1604/12188 [3:20:11<21:36:12, 7.35s/it] 13%|█▎ | 1605/12188 [3:20:17<20:59:38, 7.14s/it] {'loss': 0.384, 'grad_norm': 0.7155186964664231, 'learning_rate': 9.731849558334738e-06, 'epoch': 0.13} + 13%|█▎ | 1605/12188 [3:20:17<20:59:38, 7.14s/it] 13%|█▎ | 1606/12188 [3:20:25<21:14:27, 7.23s/it] {'loss': 0.4801, 'grad_norm': 0.7067406114689281, 'learning_rate': 9.731420106108152e-06, 'epoch': 0.13} + 13%|█▎ | 1606/12188 [3:20:25<21:14:27, 7.23s/it] 13%|█▎ | 1607/12188 [3:20:33<22:10:02, 7.54s/it] {'loss': 0.3753, 'grad_norm': 0.6873451903847828, 'learning_rate': 9.730990319756277e-06, 'epoch': 0.13} + 13%|█▎ | 1607/12188 [3:20:33<22:10:02, 7.54s/it] 13%|█▎ | 1608/12188 [3:20:40<21:41:32, 7.38s/it] {'loss': 0.4314, 'grad_norm': 0.6487732952602883, 'learning_rate': 9.730560199309467e-06, 'epoch': 0.13} + 13%|█▎ | 1608/12188 [3:20:40<21:41:32, 7.38s/it] 13%|█▎ | 1609/12188 [3:20:47<21:35:50, 7.35s/it] {'loss': 0.3976, 'grad_norm': 0.724534452991904, 'learning_rate': 9.730129744798096e-06, 'epoch': 0.13} + 13%|█▎ | 1609/12188 [3:20:47<21:35:50, 7.35s/it] 13%|█▎ | 1610/12188 [3:20:54<21:18:03, 7.25s/it] {'loss': 0.4069, 'grad_norm': 0.6327196645760851, 'learning_rate': 9.729698956252563e-06, 'epoch': 0.13} + 13%|█▎ | 1610/12188 [3:20:54<21:18:03, 7.25s/it] 13%|█▎ | 1611/12188 [3:21:01<20:57:09, 7.13s/it] {'loss': 0.3564, 'grad_norm': 0.6190690600054813, 'learning_rate': 9.729267833703286e-06, 'epoch': 0.13} + 13%|█▎ | 1611/12188 [3:21:01<20:57:09, 7.13s/it] 13%|█▎ | 1612/12188 [3:21:08<20:52:33, 7.11s/it] {'loss': 0.3891, 'grad_norm': 0.724534896615605, 'learning_rate': 9.728836377180715e-06, 'epoch': 0.13} + 13%|█▎ | 1612/12188 [3:21:08<20:52:33, 7.11s/it] 13%|█▎ | 1613/12188 [3:21:15<20:24:28, 6.95s/it] {'loss': 0.451, 'grad_norm': 0.6428763179186291, 'learning_rate': 9.728404586715317e-06, 'epoch': 0.13} + 13%|█▎ | 1613/12188 [3:21:15<20:24:28, 6.95s/it] 13%|█▎ | 1614/12188 [3:21:22<20:37:21, 7.02s/it] {'loss': 0.4033, 'grad_norm': 0.6915871587078544, 'learning_rate': 9.727972462337583e-06, 'epoch': 0.13} + 13%|█▎ | 1614/12188 [3:21:22<20:37:21, 7.02s/it] 13%|█▎ | 1615/12188 [3:21:29<20:34:35, 7.01s/it] {'loss': 0.4272, 'grad_norm': 0.6728877466538178, 'learning_rate': 9.727540004078029e-06, 'epoch': 0.13} + 13%|█▎ | 1615/12188 [3:21:29<20:34:35, 7.01s/it] 13%|█▎ | 1616/12188 [3:21:38<21:54:34, 7.46s/it] {'loss': 0.4146, 'grad_norm': 0.6377912257148216, 'learning_rate': 9.727107211967194e-06, 'epoch': 0.13} + 13%|█▎ | 1616/12188 [3:21:38<21:54:34, 7.46s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f021799ab60> +[Try #0] Failed to fetch sample 4491913 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f021799ab60> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Picture 2 of 18'"}, {'from': 'gpt', 'value': '\nclick(x=0.245, y=0.5745)\n'}]} + 13%|█▎ | 1617/12188 [3:21:45<21:59:38, 7.49s/it] {'loss': 0.3974, 'grad_norm': 0.6878320537648082, 'learning_rate': 9.726674086035645e-06, 'epoch': 0.13} + 13%|█▎ | 1617/12188 [3:21:45<21:59:38, 7.49s/it] 13%|█▎ | 1618/12188 [3:21:52<21:27:45, 7.31s/it] {'loss': 0.4037, 'grad_norm': 0.6743523886057567, 'learning_rate': 9.726240626313967e-06, 'epoch': 0.13} + 13%|█▎ | 1618/12188 [3:21:52<21:27:45, 7.31s/it] 13%|█▎ | 1619/12188 [3:21:59<21:21:34, 7.28s/it] {'loss': 0.3944, 'grad_norm': 0.6580666256219253, 'learning_rate': 9.725806832832768e-06, 'epoch': 0.13} + 13%|█▎ | 1619/12188 [3:21:59<21:21:34, 7.28s/it] 13%|█▎ | 1620/12188 [3:22:06<21:24:38, 7.29s/it] {'loss': 0.407, 'grad_norm': 0.7360789973516539, 'learning_rate': 9.725372705622682e-06, 'epoch': 0.13} + 13%|█▎ | 1620/12188 [3:22:07<21:24:38, 7.29s/it] 13%|█▎ | 1621/12188 [3:22:13<21:06:30, 7.19s/it] {'loss': 0.4326, 'grad_norm': 0.6721686485989398, 'learning_rate': 9.724938244714367e-06, 'epoch': 0.13} + 13%|█▎ | 1621/12188 [3:22:13<21:06:30, 7.19s/it] 13%|█▎ | 1622/12188 [3:22:20<20:54:49, 7.13s/it] {'loss': 0.3815, 'grad_norm': 0.6961730870434042, 'learning_rate': 9.724503450138505e-06, 'epoch': 0.13} + 13%|█▎ | 1622/12188 [3:22:20<20:54:49, 7.13s/it] 13%|█▎ | 1623/12188 [3:22:28<21:15:28, 7.24s/it] {'loss': 0.3745, 'grad_norm': 0.6528871925891847, 'learning_rate': 9.724068321925801e-06, 'epoch': 0.13} + 13%|█▎ | 1623/12188 [3:22:28<21:15:28, 7.24s/it] 13%|█▎ | 1624/12188 [3:22:35<21:12:25, 7.23s/it] {'loss': 0.4216, 'grad_norm': 0.6672025634607378, 'learning_rate': 9.72363286010698e-06, 'epoch': 0.13} + 13%|█▎ | 1624/12188 [3:22:35<21:12:25, 7.23s/it] 13%|█▎ | 1625/12188 [3:22:42<21:11:12, 7.22s/it] {'loss': 0.4358, 'grad_norm': 0.6754758622704397, 'learning_rate': 9.723197064712798e-06, 'epoch': 0.13} + 13%|█▎ | 1625/12188 [3:22:42<21:11:12, 7.22s/it] 13%|█▎ | 1626/12188 [3:22:50<21:51:40, 7.45s/it] {'loss': 0.4149, 'grad_norm': 0.6994786839829311, 'learning_rate': 9.722760935774026e-06, 'epoch': 0.13} + 13%|█▎ | 1626/12188 [3:22:50<21:51:40, 7.45s/it] 13%|█▎ | 1627/12188 [3:22:58<22:00:57, 7.50s/it] {'loss': 0.4249, 'grad_norm': 0.693281162401093, 'learning_rate': 9.722324473321464e-06, 'epoch': 0.13} + 13%|█▎ | 1627/12188 [3:22:58<22:00:57, 7.50s/it] 13%|█▎ | 1628/12188 [3:23:07<23:06:36, 7.88s/it] {'loss': 0.3527, 'grad_norm': 0.5856439581806301, 'learning_rate': 9.721887677385936e-06, 'epoch': 0.13} + 13%|█▎ | 1628/12188 [3:23:07<23:06:36, 7.88s/it] 13%|█▎ | 1629/12188 [3:23:16<24:28:11, 8.34s/it] {'loss': 0.3839, 'grad_norm': 0.6147829204125782, 'learning_rate': 9.721450547998285e-06, 'epoch': 0.13} + 13%|█▎ | 1629/12188 [3:23:16<24:28:11, 8.34s/it] 13%|█▎ | 1630/12188 [3:23:25<24:43:02, 8.43s/it] {'loss': 0.4115, 'grad_norm': 0.6606645073893808, 'learning_rate': 9.721013085189384e-06, 'epoch': 0.13} + 13%|█▎ | 1630/12188 [3:23:25<24:43:02, 8.43s/it] 13%|█▎ | 1631/12188 [3:23:32<23:21:34, 7.97s/it] {'loss': 0.3694, 'grad_norm': 0.6072703632690909, 'learning_rate': 9.720575288990121e-06, 'epoch': 0.13} + 13%|█▎ | 1631/12188 [3:23:32<23:21:34, 7.97s/it] 13%|█▎ | 1632/12188 [3:23:39<22:41:46, 7.74s/it] {'loss': 0.3731, 'grad_norm': 0.5983060404671314, 'learning_rate': 9.720137159431418e-06, 'epoch': 0.13} + 13%|█▎ | 1632/12188 [3:23:39<22:41:46, 7.74s/it] 13%|█▎ | 1633/12188 [3:23:47<22:58:31, 7.84s/it] {'loss': 0.3993, 'grad_norm': 0.6770176955428625, 'learning_rate': 9.719698696544211e-06, 'epoch': 0.13} + 13%|█▎ | 1633/12188 [3:23:47<22:58:31, 7.84s/it] 13%|█▎ | 1634/12188 [3:23:54<22:06:27, 7.54s/it] {'loss': 0.3581, 'grad_norm': 0.603181411966202, 'learning_rate': 9.719259900359466e-06, 'epoch': 0.13} + 13%|█▎ | 1634/12188 [3:23:54<22:06:27, 7.54s/it] 13%|█▎ | 1635/12188 [3:24:01<21:34:33, 7.36s/it] {'loss': 0.398, 'grad_norm': 0.6591991395281738, 'learning_rate': 9.718820770908167e-06, 'epoch': 0.13} + 13%|█▎ | 1635/12188 [3:24:01<21:34:33, 7.36s/it] 13%|█▎ | 1636/12188 [3:24:08<21:05:01, 7.19s/it] {'loss': 0.4099, 'grad_norm': 0.6702028119004705, 'learning_rate': 9.718381308221327e-06, 'epoch': 0.13} + 13%|█▎ | 1636/12188 [3:24:08<21:05:01, 7.19s/it] 13%|█▎ | 1637/12188 [3:24:15<21:04:54, 7.19s/it] {'loss': 0.3777, 'grad_norm': 0.6686821070192852, 'learning_rate': 9.71794151232998e-06, 'epoch': 0.13} + 13%|█▎ | 1637/12188 [3:24:15<21:04:54, 7.19s/it] 13%|█▎ | 1638/12188 [3:24:22<20:57:54, 7.15s/it] {'loss': 0.4023, 'grad_norm': 0.6738732607268347, 'learning_rate': 9.717501383265184e-06, 'epoch': 0.13} + 13%|█▎ | 1638/12188 [3:24:22<20:57:54, 7.15s/it] 13%|█▎ | 1639/12188 [3:24:29<21:15:08, 7.25s/it] {'loss': 0.3905, 'grad_norm': 0.6595127536320008, 'learning_rate': 9.717060921058019e-06, 'epoch': 0.13} + 13%|█▎ | 1639/12188 [3:24:29<21:15:08, 7.25s/it] 13%|█▎ | 1640/12188 [3:24:37<21:34:36, 7.36s/it] {'loss': 0.4006, 'grad_norm': 0.640499927147956, 'learning_rate': 9.71662012573959e-06, 'epoch': 0.13} + 13%|█▎ | 1640/12188 [3:24:37<21:34:36, 7.36s/it] 13%|█▎ | 1641/12188 [3:24:46<23:28:59, 8.02s/it] {'loss': 0.3536, 'grad_norm': 0.628518738795981, 'learning_rate': 9.716178997341026e-06, 'epoch': 0.13} + 13%|█▎ | 1641/12188 [3:24:46<23:28:59, 8.02s/it] 13%|█▎ | 1642/12188 [3:24:55<23:41:34, 8.09s/it] {'loss': 0.3729, 'grad_norm': 0.6405554700067475, 'learning_rate': 9.715737535893479e-06, 'epoch': 0.13} + 13%|█▎ | 1642/12188 [3:24:55<23:41:34, 8.09s/it] 13%|█▎ | 1643/12188 [3:25:02<23:27:21, 8.01s/it] {'loss': 0.3958, 'grad_norm': 0.6601629268467782, 'learning_rate': 9.715295741428123e-06, 'epoch': 0.13} + 13%|█▎ | 1643/12188 [3:25:02<23:27:21, 8.01s/it] 13%|█▎ | 1644/12188 [3:25:13<25:14:33, 8.62s/it] {'loss': 0.3621, 'grad_norm': 0.6828444145399717, 'learning_rate': 9.714853613976156e-06, 'epoch': 0.13} + 13%|█▎ | 1644/12188 [3:25:13<25:14:33, 8.62s/it] 13%|█▎ | 1645/12188 [3:25:19<23:33:18, 8.04s/it] {'loss': 0.366, 'grad_norm': 0.6578556878862182, 'learning_rate': 9.714411153568803e-06, 'epoch': 0.13} + 13%|█▎ | 1645/12188 [3:25:19<23:33:18, 8.04s/it] 14%|█▎ | 1646/12188 [3:25:26<22:47:35, 7.78s/it] {'loss': 0.4191, 'grad_norm': 0.6599618548446035, 'learning_rate': 9.71396836023731e-06, 'epoch': 0.14} + 14%|█▎ | 1646/12188 [3:25:26<22:47:35, 7.78s/it] 14%|█▎ | 1647/12188 [3:25:34<23:00:21, 7.86s/it] {'loss': 0.3818, 'grad_norm': 0.6006643823291847, 'learning_rate': 9.713525234012943e-06, 'epoch': 0.14} + 14%|█▎ | 1647/12188 [3:25:34<23:00:21, 7.86s/it] 14%|█▎ | 1648/12188 [3:25:45<24:59:50, 8.54s/it] {'loss': 0.3928, 'grad_norm': 0.6950765681279477, 'learning_rate': 9.713081774926997e-06, 'epoch': 0.14} + 14%|█▎ | 1648/12188 [3:25:45<24:59:50, 8.54s/it] 14%|█▎ | 1649/12188 [3:25:51<23:12:54, 7.93s/it] {'loss': 0.4226, 'grad_norm': 0.6446073714181629, 'learning_rate': 9.712637983010789e-06, 'epoch': 0.14} + 14%|█▎ | 1649/12188 [3:25:51<23:12:54, 7.93s/it] 14%|█▎ | 1650/12188 [3:25:59<23:25:56, 8.01s/it] {'loss': 0.4091, 'grad_norm': 0.6593469281304896, 'learning_rate': 9.712193858295657e-06, 'epoch': 0.14} + 14%|█▎ | 1650/12188 [3:25:59<23:25:56, 8.01s/it] 14%|█▎ | 1651/12188 [3:26:06<22:31:47, 7.70s/it] {'loss': 0.3963, 'grad_norm': 0.6232536444548172, 'learning_rate': 9.711749400812966e-06, 'epoch': 0.14} + 14%|█▎ | 1651/12188 [3:26:06<22:31:47, 7.70s/it] 14%|█▎ | 1652/12188 [3:26:14<22:45:50, 7.78s/it] {'loss': 0.4144, 'grad_norm': 0.5951451955354329, 'learning_rate': 9.711304610594104e-06, 'epoch': 0.14} + 14%|█▎ | 1652/12188 [3:26:14<22:45:50, 7.78s/it] 14%|█▎ | 1653/12188 [3:26:21<21:58:51, 7.51s/it] {'loss': 0.4017, 'grad_norm': 0.595148825129877, 'learning_rate': 9.710859487670478e-06, 'epoch': 0.14} + 14%|█▎ | 1653/12188 [3:26:21<21:58:51, 7.51s/it] 14%|█▎ | 1654/12188 [3:26:28<21:46:29, 7.44s/it] {'loss': 0.4063, 'grad_norm': 0.6660713741731098, 'learning_rate': 9.710414032073524e-06, 'epoch': 0.14} + 14%|█▎ | 1654/12188 [3:26:28<21:46:29, 7.44s/it] 14%|█▎ | 1655/12188 [3:26:35<21:17:29, 7.28s/it] {'loss': 0.4318, 'grad_norm': 0.6726252111452041, 'learning_rate': 9.709968243834698e-06, 'epoch': 0.14} + 14%|█▎ | 1655/12188 [3:26:35<21:17:29, 7.28s/it] 14%|█▎ | 1656/12188 [3:26:45<23:37:54, 8.08s/it] {'loss': 0.4077, 'grad_norm': 0.6247854459878368, 'learning_rate': 9.709522122985482e-06, 'epoch': 0.14} + 14%|█▎ | 1656/12188 [3:26:45<23:37:54, 8.08s/it] 14%|█▎ | 1657/12188 [3:26:52<22:17:45, 7.62s/it] {'loss': 0.4168, 'grad_norm': 0.7176238050532752, 'learning_rate': 9.709075669557381e-06, 'epoch': 0.14} + 14%|█▎ | 1657/12188 [3:26:52<22:17:45, 7.62s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 14%|█▎ | 1658/12188 [3:26:58<21:24:00, 7.32s/it] {'loss': 0.7494, 'grad_norm': 0.9698909817935001, 'learning_rate': 9.708628883581921e-06, 'epoch': 0.14} + 14%|█▎ | 1658/12188 [3:26:58<21:24:00, 7.32s/it] 14%|█▎ | 1659/12188 [3:27:06<21:42:13, 7.42s/it] {'loss': 0.3804, 'grad_norm': 0.6520930136761901, 'learning_rate': 9.708181765090654e-06, 'epoch': 0.14} + 14%|█▎ | 1659/12188 [3:27:06<21:42:13, 7.42s/it] 14%|█▎ | 1660/12188 [3:27:13<21:33:19, 7.37s/it] {'loss': 0.4019, 'grad_norm': 0.6785797200845367, 'learning_rate': 9.707734314115157e-06, 'epoch': 0.14} + 14%|█▎ | 1660/12188 [3:27:13<21:33:19, 7.37s/it] 14%|█▎ | 1661/12188 [3:27:20<21:16:43, 7.28s/it] {'loss': 0.3701, 'grad_norm': 0.5973399387471978, 'learning_rate': 9.707286530687025e-06, 'epoch': 0.14} + 14%|█▎ | 1661/12188 [3:27:20<21:16:43, 7.28s/it] 14%|█▎ | 1662/12188 [3:27:28<21:45:12, 7.44s/it] {'loss': 0.3877, 'grad_norm': 0.6373580329061939, 'learning_rate': 9.70683841483788e-06, 'epoch': 0.14} + 14%|█▎ | 1662/12188 [3:27:28<21:45:12, 7.44s/it] 14%|█▎ | 1663/12188 [3:27:35<21:07:42, 7.23s/it] {'loss': 0.3784, 'grad_norm': 0.648951531138014, 'learning_rate': 9.70638996659937e-06, 'epoch': 0.14} + 14%|█▎ | 1663/12188 [3:27:35<21:07:42, 7.23s/it] 14%|█▎ | 1664/12188 [3:27:42<21:05:55, 7.22s/it] {'loss': 0.3964, 'grad_norm': 0.6753928684898055, 'learning_rate': 9.70594118600316e-06, 'epoch': 0.14} + 14%|█▎ | 1664/12188 [3:27:42<21:05:55, 7.22s/it] 14%|█▎ | 1665/12188 [3:27:49<20:51:45, 7.14s/it] {'loss': 0.4197, 'grad_norm': 0.6433674837475958, 'learning_rate': 9.705492073080946e-06, 'epoch': 0.14} + 14%|█▎ | 1665/12188 [3:27:49<20:51:45, 7.14s/it] 14%|█▎ | 1666/12188 [3:27:57<21:24:47, 7.33s/it] {'loss': 0.4447, 'grad_norm': 0.653519472919519, 'learning_rate': 9.70504262786444e-06, 'epoch': 0.14} + 14%|█▎ | 1666/12188 [3:27:57<21:24:47, 7.33s/it] 14%|█▎ | 1667/12188 [3:28:04<21:07:52, 7.23s/it] {'loss': 0.4227, 'grad_norm': 0.7005015051730782, 'learning_rate': 9.704592850385385e-06, 'epoch': 0.14} + 14%|█▎ | 1667/12188 [3:28:04<21:07:52, 7.23s/it] 14%|█▎ | 1668/12188 [3:28:11<21:09:09, 7.24s/it] {'loss': 0.3611, 'grad_norm': 0.6252080681296962, 'learning_rate': 9.704142740675539e-06, 'epoch': 0.14} + 14%|█▎ | 1668/12188 [3:28:11<21:09:09, 7.24s/it] 14%|█▎ | 1669/12188 [3:28:18<20:48:28, 7.12s/it] {'loss': 0.4376, 'grad_norm': 0.6770551783137584, 'learning_rate': 9.703692298766694e-06, 'epoch': 0.14} + 14%|█▎ | 1669/12188 [3:28:18<20:48:28, 7.12s/it] 14%|█▎ | 1670/12188 [3:28:26<21:31:36, 7.37s/it] {'loss': 0.4181, 'grad_norm': 0.6419697702488025, 'learning_rate': 9.703241524690652e-06, 'epoch': 0.14} + 14%|█▎ | 1670/12188 [3:28:26<21:31:36, 7.37s/it] 14%|█▎ | 1671/12188 [3:28:33<21:00:23, 7.19s/it] {'loss': 0.3873, 'grad_norm': 0.646318590508612, 'learning_rate': 9.702790418479252e-06, 'epoch': 0.14} + 14%|█▎ | 1671/12188 [3:28:33<21:00:23, 7.19s/it] 14%|█▎ | 1672/12188 [3:28:40<20:48:15, 7.12s/it] {'loss': 0.4188, 'grad_norm': 0.6494908299913348, 'learning_rate': 9.702338980164347e-06, 'epoch': 0.14} + 14%|█▎ | 1672/12188 [3:28:40<20:48:15, 7.12s/it] 14%|█▎ | 1673/12188 [3:28:47<21:16:45, 7.29s/it] {'loss': 0.4262, 'grad_norm': 0.6730417992048091, 'learning_rate': 9.70188720977782e-06, 'epoch': 0.14} + 14%|█▎ | 1673/12188 [3:28:47<21:16:45, 7.29s/it] 14%|█▎ | 1674/12188 [3:28:59<24:56:44, 8.54s/it] {'loss': 0.4075, 'grad_norm': 0.6436000293832644, 'learning_rate': 9.70143510735157e-06, 'epoch': 0.14} + 14%|█▎ | 1674/12188 [3:28:59<24:56:44, 8.54s/it] 14%|█▎ | 1675/12188 [3:29:06<23:28:49, 8.04s/it] {'loss': 0.3738, 'grad_norm': 0.6493726664551049, 'learning_rate': 9.700982672917529e-06, 'epoch': 0.14} + 14%|█▎ | 1675/12188 [3:29:06<23:28:49, 8.04s/it] 14%|█▍ | 1676/12188 [3:29:13<22:31:39, 7.71s/it] {'loss': 0.4086, 'grad_norm': 0.6342064508158133, 'learning_rate': 9.700529906507642e-06, 'epoch': 0.14} + 14%|█▍ | 1676/12188 [3:29:13<22:31:39, 7.71s/it] 14%|█▍ | 1677/12188 [3:29:20<21:58:21, 7.53s/it] {'loss': 0.4005, 'grad_norm': 0.6588596547150405, 'learning_rate': 9.700076808153886e-06, 'epoch': 0.14} + 14%|█▍ | 1677/12188 [3:29:20<21:58:21, 7.53s/it] 14%|█▍ | 1678/12188 [3:29:26<21:12:54, 7.27s/it] {'loss': 0.4076, 'grad_norm': 0.6715754214652936, 'learning_rate': 9.699623377888256e-06, 'epoch': 0.14} + 14%|█▍ | 1678/12188 [3:29:26<21:12:54, 7.27s/it] 14%|█▍ | 1679/12188 [3:29:33<20:54:33, 7.16s/it] {'loss': 0.4104, 'grad_norm': 0.6622775738942146, 'learning_rate': 9.699169615742777e-06, 'epoch': 0.14} + 14%|█▍ | 1679/12188 [3:29:33<20:54:33, 7.16s/it] 14%|█▍ | 1680/12188 [3:29:40<20:37:56, 7.07s/it] {'loss': 0.3722, 'grad_norm': 0.5864352426916448, 'learning_rate': 9.698715521749485e-06, 'epoch': 0.14} + 14%|█▍ | 1680/12188 [3:29:40<20:37:56, 7.07s/it] 14%|█▍ | 1681/12188 [3:29:47<20:29:14, 7.02s/it] {'loss': 0.3747, 'grad_norm': 0.6262017438008677, 'learning_rate': 9.698261095940454e-06, 'epoch': 0.14} + 14%|█▍ | 1681/12188 [3:29:47<20:29:14, 7.02s/it] 14%|█▍ | 1682/12188 [3:29:54<20:25:08, 7.00s/it] {'loss': 0.3893, 'grad_norm': 0.6617649744167831, 'learning_rate': 9.697806338347774e-06, 'epoch': 0.14} + 14%|█▍ | 1682/12188 [3:29:54<20:25:08, 7.00s/it] 14%|█▍ | 1683/12188 [3:30:01<20:30:58, 7.03s/it] {'loss': 0.4056, 'grad_norm': 0.6471035734939772, 'learning_rate': 9.697351249003555e-06, 'epoch': 0.14} + 14%|█▍ | 1683/12188 [3:30:01<20:30:58, 7.03s/it] 14%|█▍ | 1684/12188 [3:30:09<21:06:24, 7.23s/it] {'loss': 0.3404, 'grad_norm': 0.6231307505526998, 'learning_rate': 9.69689582793994e-06, 'epoch': 0.14} + 14%|█▍ | 1684/12188 [3:30:09<21:06:24, 7.23s/it] 14%|█▍ | 1685/12188 [3:30:16<20:42:03, 7.10s/it] {'loss': 0.431, 'grad_norm': 0.7133051825106753, 'learning_rate': 9.696440075189086e-06, 'epoch': 0.14} + 14%|█▍ | 1685/12188 [3:30:16<20:42:03, 7.10s/it] 14%|█▍ | 1686/12188 [3:30:24<21:55:15, 7.51s/it] {'loss': 0.4002, 'grad_norm': 0.6612415628477052, 'learning_rate': 9.695983990783182e-06, 'epoch': 0.14} + 14%|█▍ | 1686/12188 [3:30:24<21:55:15, 7.51s/it] 14%|█▍ | 1687/12188 [3:30:31<21:51:51, 7.50s/it] {'loss': 0.4308, 'grad_norm': 0.7210108547353837, 'learning_rate': 9.695527574754431e-06, 'epoch': 0.14} + 14%|█▍ | 1687/12188 [3:30:31<21:51:51, 7.50s/it] 14%|█▍ | 1688/12188 [3:30:38<20:56:46, 7.18s/it] {'loss': 0.3973, 'grad_norm': 0.6537386152958304, 'learning_rate': 9.695070827135067e-06, 'epoch': 0.14} + 14%|█▍ | 1688/12188 [3:30:38<20:56:46, 7.18s/it] 14%|█▍ | 1689/12188 [3:30:46<21:42:06, 7.44s/it] {'loss': 0.3737, 'grad_norm': 0.6099174232641416, 'learning_rate': 9.694613747957344e-06, 'epoch': 0.14} + 14%|█▍ | 1689/12188 [3:30:46<21:42:06, 7.44s/it] 14%|█▍ | 1690/12188 [3:30:53<21:04:33, 7.23s/it] {'loss': 0.379, 'grad_norm': 0.6514161090072978, 'learning_rate': 9.694156337253541e-06, 'epoch': 0.14} + 14%|█▍ | 1690/12188 [3:30:53<21:04:33, 7.23s/it] 14%|█▍ | 1691/12188 [3:30:59<20:39:52, 7.09s/it] {'loss': 0.3715, 'grad_norm': 0.6785568772167118, 'learning_rate': 9.693698595055961e-06, 'epoch': 0.14} + 14%|█▍ | 1691/12188 [3:30:59<20:39:52, 7.09s/it] 14%|█▍ | 1692/12188 [3:31:07<20:56:01, 7.18s/it] {'loss': 0.3882, 'grad_norm': 0.6684727279853205, 'learning_rate': 9.693240521396924e-06, 'epoch': 0.14} + 14%|█▍ | 1692/12188 [3:31:07<20:56:01, 7.18s/it] 14%|█▍ | 1693/12188 [3:31:14<20:48:51, 7.14s/it] {'loss': 0.388, 'grad_norm': 0.6824204489363087, 'learning_rate': 9.692782116308783e-06, 'epoch': 0.14} + 14%|█▍ | 1693/12188 [3:31:14<20:48:51, 7.14s/it] 14%|█▍ | 1694/12188 [3:31:22<21:20:09, 7.32s/it] {'loss': 0.3957, 'grad_norm': 0.6617457633346283, 'learning_rate': 9.692323379823909e-06, 'epoch': 0.14} + 14%|█▍ | 1694/12188 [3:31:22<21:20:09, 7.32s/it] 14%|█▍ | 1695/12188 [3:31:28<20:49:52, 7.15s/it] {'loss': 0.3921, 'grad_norm': 0.6706517038511693, 'learning_rate': 9.691864311974697e-06, 'epoch': 0.14} + 14%|█▍ | 1695/12188 [3:31:28<20:49:52, 7.15s/it] 14%|█▍ | 1696/12188 [3:31:36<20:50:55, 7.15s/it] {'loss': 0.3955, 'grad_norm': 0.6371419961443001, 'learning_rate': 9.691404912793566e-06, 'epoch': 0.14} + 14%|█▍ | 1696/12188 [3:31:36<20:50:55, 7.15s/it] 14%|█▍ | 1697/12188 [3:31:43<21:12:02, 7.28s/it] {'loss': 0.3939, 'grad_norm': 0.6301764665548193, 'learning_rate': 9.690945182312955e-06, 'epoch': 0.14} + 14%|█▍ | 1697/12188 [3:31:43<21:12:02, 7.28s/it] 14%|█▍ | 1698/12188 [3:31:51<21:35:35, 7.41s/it] {'loss': 0.3747, 'grad_norm': 0.6520518829499622, 'learning_rate': 9.690485120565332e-06, 'epoch': 0.14} + 14%|█▍ | 1698/12188 [3:31:51<21:35:35, 7.41s/it] 14%|█▍ | 1699/12188 [3:31:58<21:01:52, 7.22s/it] {'loss': 0.4108, 'grad_norm': 0.6622015240661902, 'learning_rate': 9.690024727583186e-06, 'epoch': 0.14} + 14%|█▍ | 1699/12188 [3:31:58<21:01:52, 7.22s/it] 14%|█▍ | 1700/12188 [3:32:05<20:57:57, 7.20s/it] {'loss': 0.3927, 'grad_norm': 0.6292592132706053, 'learning_rate': 9.68956400339903e-06, 'epoch': 0.14} + 14%|█▍ | 1700/12188 [3:32:05<20:57:57, 7.20s/it] 14%|█▍ | 1701/12188 [3:32:12<20:44:59, 7.12s/it] {'loss': 0.3973, 'grad_norm': 0.672776217117184, 'learning_rate': 9.689102948045398e-06, 'epoch': 0.14} + 14%|█▍ | 1701/12188 [3:32:12<20:44:59, 7.12s/it] 14%|█▍ | 1702/12188 [3:32:19<21:14:47, 7.29s/it] {'loss': 0.3908, 'grad_norm': 0.6630569994583568, 'learning_rate': 9.688641561554849e-06, 'epoch': 0.14} + 14%|█▍ | 1702/12188 [3:32:19<21:14:47, 7.29s/it] 14%|█▍ | 1703/12188 [3:32:27<21:26:19, 7.36s/it] {'loss': 0.4148, 'grad_norm': 0.70631596259, 'learning_rate': 9.688179843959967e-06, 'epoch': 0.14} + 14%|█▍ | 1703/12188 [3:32:27<21:26:19, 7.36s/it] 14%|█▍ | 1704/12188 [3:32:34<21:16:01, 7.30s/it] {'loss': 0.4027, 'grad_norm': 0.6663008734514986, 'learning_rate': 9.687717795293354e-06, 'epoch': 0.14} + 14%|█▍ | 1704/12188 [3:32:34<21:16:01, 7.30s/it] 14%|█▍ | 1705/12188 [3:32:42<21:25:02, 7.36s/it] {'loss': 0.4111, 'grad_norm': 0.6495239279353695, 'learning_rate': 9.687255415587644e-06, 'epoch': 0.14} + 14%|█▍ | 1705/12188 [3:32:42<21:25:02, 7.36s/it] 14%|█▍ | 1706/12188 [3:32:49<21:22:25, 7.34s/it] {'loss': 0.3918, 'grad_norm': 0.7105889172836348, 'learning_rate': 9.686792704875487e-06, 'epoch': 0.14} + 14%|█▍ | 1706/12188 [3:32:49<21:22:25, 7.34s/it] 14%|█▍ | 1707/12188 [3:32:57<21:55:07, 7.53s/it] {'loss': 0.3699, 'grad_norm': 0.6287773029086667, 'learning_rate': 9.686329663189557e-06, 'epoch': 0.14} + 14%|█▍ | 1707/12188 [3:32:57<21:55:07, 7.53s/it] 14%|█▍ | 1708/12188 [3:33:04<21:40:39, 7.45s/it] {'loss': 0.4212, 'grad_norm': 0.7722245468513048, 'learning_rate': 9.685866290562557e-06, 'epoch': 0.14} + 14%|█▍ | 1708/12188 [3:33:04<21:40:39, 7.45s/it] 14%|█▍ | 1709/12188 [3:33:11<21:11:08, 7.28s/it] {'loss': 0.402, 'grad_norm': 0.6477937615087848, 'learning_rate': 9.685402587027208e-06, 'epoch': 0.14} + 14%|█▍ | 1709/12188 [3:33:11<21:11:08, 7.28s/it] 14%|█▍ | 1710/12188 [3:33:19<21:58:49, 7.55s/it] {'loss': 0.4222, 'grad_norm': 0.6635672581603141, 'learning_rate': 9.684938552616257e-06, 'epoch': 0.14} + 14%|█▍ | 1710/12188 [3:33:19<21:58:49, 7.55s/it] 14%|█▍ | 1711/12188 [3:33:27<22:15:13, 7.65s/it] {'loss': 0.4006, 'grad_norm': 0.6226620166068294, 'learning_rate': 9.68447418736247e-06, 'epoch': 0.14} + 14%|█▍ | 1711/12188 [3:33:27<22:15:13, 7.65s/it] 14%|█▍ | 1712/12188 [3:33:34<21:57:19, 7.54s/it] {'loss': 0.442, 'grad_norm': 0.711048848837852, 'learning_rate': 9.684009491298644e-06, 'epoch': 0.14} + 14%|█▍ | 1712/12188 [3:33:34<21:57:19, 7.54s/it] 14%|█▍ | 1713/12188 [3:33:41<21:20:29, 7.33s/it] {'loss': 0.4207, 'grad_norm': 0.6787796060834663, 'learning_rate': 9.683544464457593e-06, 'epoch': 0.14} + 14%|█▍ | 1713/12188 [3:33:41<21:20:29, 7.33s/it] 14%|█▍ | 1714/12188 [3:33:48<21:17:50, 7.32s/it] {'loss': 0.4017, 'grad_norm': 0.6186356445924795, 'learning_rate': 9.683079106872156e-06, 'epoch': 0.14} + 14%|█▍ | 1714/12188 [3:33:48<21:17:50, 7.32s/it] 14%|█▍ | 1715/12188 [3:33:56<21:17:37, 7.32s/it] {'loss': 0.4005, 'grad_norm': 0.6505845275733174, 'learning_rate': 9.682613418575197e-06, 'epoch': 0.14} + 14%|█▍ | 1715/12188 [3:33:56<21:17:37, 7.32s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1348, in _get_item + raise ValueError( +ValueError: Number of image tokens ['data/table/other_screenshot/original/ModernInteractiveTable_1739993892.4959967.png'] does not match number of images None +[Try #0] Failed to fetch sample 1871596 in VC:s3://gui-agent/jedi/images/component_v1_130k/component_v1_130k_extracted/. Exception: Number of image tokens ['data/table/other_screenshot/original/ModernInteractiveTable_1739993892.4959967.png'] does not match number of images None +Problematic sample: {'image': 'data/table/other_screenshot/original/ModernInteractiveTable_1739993892.4959967.png', 'conversations': [], 'image_id': 'data/table/other_screenshot/original/ModernInteractiveTable_1739993892.4959967.png'} + 14%|█▍ | 1716/12188 [3:34:03<20:59:39, 7.22s/it] {'loss': 0.4134, 'grad_norm': 0.6398391058151615, 'learning_rate': 9.6821473995996e-06, 'epoch': 0.14} + 14%|█▍ | 1716/12188 [3:34:03<20:59:39, 7.22s/it] 14%|█▍ | 1717/12188 [3:34:10<20:35:20, 7.08s/it] {'loss': 0.4201, 'grad_norm': 0.7249152944311217, 'learning_rate': 9.681681049978278e-06, 'epoch': 0.14} + 14%|█▍ | 1717/12188 [3:34:10<20:35:20, 7.08s/it] 14%|█▍ | 1718/12188 [3:34:17<21:08:09, 7.27s/it] {'loss': 0.3996, 'grad_norm': 0.634641928455588, 'learning_rate': 9.68121436974416e-06, 'epoch': 0.14} + 14%|█▍ | 1718/12188 [3:34:17<21:08:09, 7.27s/it] 14%|█▍ | 1719/12188 [3:34:24<21:01:39, 7.23s/it] {'loss': 0.3955, 'grad_norm': 0.7155963158372529, 'learning_rate': 9.680747358930205e-06, 'epoch': 0.14} + 14%|█▍ | 1719/12188 [3:34:24<21:01:39, 7.23s/it] 14%|█▍ | 1720/12188 [3:34:32<21:21:41, 7.35s/it] {'loss': 0.4103, 'grad_norm': 0.6758010921974377, 'learning_rate': 9.680280017569393e-06, 'epoch': 0.14} + 14%|█▍ | 1720/12188 [3:34:32<21:21:41, 7.35s/it] 14%|█▍ | 1721/12188 [3:34:39<20:59:20, 7.22s/it] {'loss': 0.3997, 'grad_norm': 0.6828476544768984, 'learning_rate': 9.679812345694725e-06, 'epoch': 0.14} + 14%|█▍ | 1721/12188 [3:34:39<20:59:20, 7.22s/it] 14%|█▍ | 1722/12188 [3:34:46<21:15:09, 7.31s/it] {'loss': 0.4054, 'grad_norm': 0.6277646974573768, 'learning_rate': 9.679344343339228e-06, 'epoch': 0.14} + 14%|█▍ | 1722/12188 [3:34:46<21:15:09, 7.31s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 899, in process_image_unified + visual_processed = processor.preprocess(image, return_tensors="pt") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 432, in preprocess + patches, image_grid_thw = self._preprocess( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 252, in _preprocess + resized_height, resized_width = smart_resize( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 69, in smart_resize + raise ValueError(f"height:{height} and width:{width} must be larger than factor:{factor}") +ValueError: height:21 and width:135 must be larger than factor:28 +[Try #0] Failed to fetch sample 2102091 in VC:s3://gui-agent/jedi/images/figma400k/figma400k_extracted/. Exception: height:21 and width:135 must be larger than factor:28 +Problematic sample: {'image': 'b740dccee641dd995e5ce727ca3882efdf31feffa6d5688fe120c85e9c186e93.png', 'conversations': [{'from': 'human', 'value': '\nThis Button is positioned as follows:\nThe button is located in the middle section of the interface, to the right of a green circular play button. It is part of a horizontal control panel that includes other interactive elements. The button is positioned between the play button and a three-dot menu icon.'}, {'from': 'gpt', 'value': '[[0, 0, 1000, 1000]]', 'recipient': 'all', 'end_turn': True}]} + 14%|█▍ | 1723/12188 [3:34:56<22:59:15, 7.91s/it] {'loss': 0.3997, 'grad_norm': 0.6603627347946152, 'learning_rate': 9.678876010535949e-06, 'epoch': 0.14} + 14%|█▍ | 1723/12188 [3:34:56<22:59:15, 7.91s/it] 14%|█▍ | 1724/12188 [3:35:03<22:08:22, 7.62s/it] {'loss': 0.4059, 'grad_norm': 0.7437941080939691, 'learning_rate': 9.678407347317967e-06, 'epoch': 0.14} + 14%|█▍ | 1724/12188 [3:35:03<22:08:22, 7.62s/it] 14%|█▍ | 1725/12188 [3:35:11<22:43:49, 7.82s/it] {'loss': 0.3922, 'grad_norm': 0.6743537465440499, 'learning_rate': 9.677938353718372e-06, 'epoch': 0.14} + 14%|█▍ | 1725/12188 [3:35:11<22:43:49, 7.82s/it] 14%|█▍ | 1726/12188 [3:35:19<22:35:21, 7.77s/it] {'loss': 0.4309, 'grad_norm': 0.6549036919787193, 'learning_rate': 9.677469029770286e-06, 'epoch': 0.14} + 14%|█▍ | 1726/12188 [3:35:19<22:35:21, 7.77s/it] 14%|█▍ | 1727/12188 [3:35:27<22:55:26, 7.89s/it] {'loss': 0.4207, 'grad_norm': 0.7009573379588654, 'learning_rate': 9.676999375506854e-06, 'epoch': 0.14} + 14%|█▍ | 1727/12188 [3:35:27<22:55:26, 7.89s/it] 14%|█▍ | 1728/12188 [3:35:34<22:06:00, 7.61s/it] {'loss': 0.3928, 'grad_norm': 0.6845441925902913, 'learning_rate': 9.676529390961237e-06, 'epoch': 0.14} + 14%|█▍ | 1728/12188 [3:35:34<22:06:00, 7.61s/it] 14%|█▍ | 1729/12188 [3:35:41<22:09:50, 7.63s/it] {'loss': 0.3656, 'grad_norm': 0.6499258243003074, 'learning_rate': 9.676059076166632e-06, 'epoch': 0.14} + 14%|█▍ | 1729/12188 [3:35:41<22:09:50, 7.63s/it] 14%|█▍ | 1730/12188 [3:35:48<21:24:25, 7.37s/it] {'loss': 0.4213, 'grad_norm': 0.6824365474493789, 'learning_rate': 9.675588431156245e-06, 'epoch': 0.14} + 14%|█▍ | 1730/12188 [3:35:48<21:24:25, 7.37s/it] 14%|█▍ | 1731/12188 [3:35:56<22:06:46, 7.61s/it] {'loss': 0.3817, 'grad_norm': 0.6238852895355682, 'learning_rate': 9.675117455963317e-06, 'epoch': 0.14} + 14%|█▍ | 1731/12188 [3:35:56<22:06:46, 7.61s/it] 14%|█▍ | 1732/12188 [3:36:06<23:37:06, 8.13s/it] {'loss': 0.3947, 'grad_norm': 0.6988722704469467, 'learning_rate': 9.674646150621103e-06, 'epoch': 0.14} + 14%|█▍ | 1732/12188 [3:36:06<23:37:06, 8.13s/it] 14%|█▍ | 1733/12188 [3:36:14<23:20:43, 8.04s/it] {'loss': 0.4188, 'grad_norm': 0.6316689640650498, 'learning_rate': 9.674174515162889e-06, 'epoch': 0.14} + 14%|█▍ | 1733/12188 [3:36:14<23:20:43, 8.04s/it] 14%|█▍ | 1734/12188 [3:36:22<23:25:00, 8.06s/it] {'loss': 0.3825, 'grad_norm': 0.7527694544592057, 'learning_rate': 9.673702549621982e-06, 'epoch': 0.14} + 14%|█▍ | 1734/12188 [3:36:22<23:25:00, 8.06s/it] 14%|█▍ | 1735/12188 [3:36:29<22:47:23, 7.85s/it] {'loss': 0.406, 'grad_norm': 0.6498832592112922, 'learning_rate': 9.673230254031708e-06, 'epoch': 0.14} + 14%|█▍ | 1735/12188 [3:36:29<22:47:23, 7.85s/it] 14%|█▍ | 1736/12188 [3:36:36<21:53:24, 7.54s/it] {'loss': 0.4209, 'grad_norm': 0.7018920698467616, 'learning_rate': 9.672757628425421e-06, 'epoch': 0.14} + 14%|█▍ | 1736/12188 [3:36:36<21:53:24, 7.54s/it] 14%|█▍ | 1737/12188 [3:36:43<21:27:06, 7.39s/it] {'loss': 0.3989, 'grad_norm': 0.6595310919546475, 'learning_rate': 9.672284672836498e-06, 'epoch': 0.14} + 14%|█▍ | 1737/12188 [3:36:43<21:27:06, 7.39s/it] 14%|█▍ | 1738/12188 [3:36:50<21:11:23, 7.30s/it] {'loss': 0.4054, 'grad_norm': 0.6429026174372686, 'learning_rate': 9.67181138729834e-06, 'epoch': 0.14} + 14%|█▍ | 1738/12188 [3:36:50<21:11:23, 7.30s/it] 14%|█▍ | 1739/12188 [3:36:57<21:18:27, 7.34s/it] {'loss': 0.4029, 'grad_norm': 0.6347736008827259, 'learning_rate': 9.671337771844366e-06, 'epoch': 0.14} + 14%|█▍ | 1739/12188 [3:36:57<21:18:27, 7.34s/it] 14%|█▍ | 1740/12188 [3:37:05<21:39:24, 7.46s/it] {'loss': 0.3938, 'grad_norm': 0.6878453400054011, 'learning_rate': 9.670863826508024e-06, 'epoch': 0.14} + 14%|█▍ | 1740/12188 [3:37:05<21:39:24, 7.46s/it] 14%|█▍ | 1741/12188 [3:37:13<22:15:54, 7.67s/it] {'loss': 0.3605, 'grad_norm': 0.6861678030259173, 'learning_rate': 9.670389551322783e-06, 'epoch': 0.14} + 14%|█▍ | 1741/12188 [3:37:13<22:15:54, 7.67s/it] 14%|█▍ | 1742/12188 [3:37:20<21:41:42, 7.48s/it] {'loss': 0.4065, 'grad_norm': 0.7586251735867113, 'learning_rate': 9.669914946322134e-06, 'epoch': 0.14} + 14%|█▍ | 1742/12188 [3:37:20<21:41:42, 7.48s/it] 14%|█▍ | 1743/12188 [3:37:29<22:38:27, 7.80s/it] {'loss': 0.3458, 'grad_norm': 0.6357971397754794, 'learning_rate': 9.669440011539598e-06, 'epoch': 0.14} + 14%|█▍ | 1743/12188 [3:37:29<22:38:27, 7.80s/it] 14%|█▍ | 1744/12188 [3:37:36<21:49:20, 7.52s/it] {'loss': 0.4003, 'grad_norm': 0.6584986279337217, 'learning_rate': 9.668964747008707e-06, 'epoch': 0.14} + 14%|█▍ | 1744/12188 [3:37:36<21:49:20, 7.52s/it] 14%|█▍ | 1745/12188 [3:37:43<21:14:02, 7.32s/it] {'loss': 0.4121, 'grad_norm': 0.6858484127143393, 'learning_rate': 9.668489152763028e-06, 'epoch': 0.14} + 14%|█▍ | 1745/12188 [3:37:43<21:14:02, 7.32s/it] 14%|█▍ | 1746/12188 [3:37:50<21:04:58, 7.27s/it] {'loss': 0.3942, 'grad_norm': 0.6722117628100942, 'learning_rate': 9.668013228836144e-06, 'epoch': 0.14} + 14%|█▍ | 1746/12188 [3:37:50<21:04:58, 7.27s/it] 14%|█▍ | 1747/12188 [3:37:57<21:04:37, 7.27s/it] {'loss': 0.4299, 'grad_norm': 0.7011677451638232, 'learning_rate': 9.667536975261667e-06, 'epoch': 0.14} + 14%|█▍ | 1747/12188 [3:37:57<21:04:37, 7.27s/it] 14%|█▍ | 1748/12188 [3:38:04<20:53:27, 7.20s/it] {'loss': 0.3607, 'grad_norm': 0.6201871759706097, 'learning_rate': 9.667060392073228e-06, 'epoch': 0.14} + 14%|█▍ | 1748/12188 [3:38:04<20:53:27, 7.20s/it] 14%|█▍ | 1749/12188 [3:38:12<21:23:03, 7.37s/it] {'loss': 0.3959, 'grad_norm': 0.6979258554084261, 'learning_rate': 9.666583479304483e-06, 'epoch': 0.14} + 14%|█▍ | 1749/12188 [3:38:12<21:23:03, 7.37s/it] 14%|█▍ | 1750/12188 [3:38:20<21:40:04, 7.47s/it] {'loss': 0.4267, 'grad_norm': 0.8193904705920837, 'learning_rate': 9.666106236989107e-06, 'epoch': 0.14} + 14%|█▍ | 1750/12188 [3:38:20<21:40:04, 7.47s/it] 14%|█▍ | 1751/12188 [3:38:27<21:31:58, 7.43s/it] {'loss': 0.4119, 'grad_norm': 0.6404231965230299, 'learning_rate': 9.665628665160809e-06, 'epoch': 0.14} + 14%|█▍ | 1751/12188 [3:38:27<21:31:58, 7.43s/it] 14%|█▍ | 1752/12188 [3:38:36<22:54:22, 7.90s/it] {'loss': 0.3683, 'grad_norm': 0.6650027157106351, 'learning_rate': 9.665150763853307e-06, 'epoch': 0.14} + 14%|█▍ | 1752/12188 [3:38:36<22:54:22, 7.90s/it] 14%|█▍ | 1753/12188 [3:38:43<22:04:25, 7.62s/it] {'loss': 0.4485, 'grad_norm': 0.6180876662121777, 'learning_rate': 9.664672533100356e-06, 'epoch': 0.14} + 14%|█▍ | 1753/12188 [3:38:43<22:04:25, 7.62s/it] 14%|█▍ | 1754/12188 [3:38:49<21:15:19, 7.33s/it] {'loss': 0.4076, 'grad_norm': 0.7126157271517973, 'learning_rate': 9.664193972935725e-06, 'epoch': 0.14} + 14%|█▍ | 1754/12188 [3:38:50<21:15:19, 7.33s/it] 14%|█▍ | 1755/12188 [3:38:56<20:39:44, 7.13s/it] {'loss': 0.4359, 'grad_norm': 0.7670867386860619, 'learning_rate': 9.663715083393209e-06, 'epoch': 0.14} + 14%|█▍ | 1755/12188 [3:38:56<20:39:44, 7.13s/it] 14%|█▍ | 1756/12188 [3:39:03<20:15:28, 6.99s/it] {'loss': 0.3962, 'grad_norm': 0.6196413972544921, 'learning_rate': 9.663235864506625e-06, 'epoch': 0.14} + 14%|█▍ | 1756/12188 [3:39:03<20:15:28, 6.99s/it] 14%|█▍ | 1757/12188 [3:39:11<20:57:23, 7.23s/it] {'loss': 0.4024, 'grad_norm': 0.5985291358578014, 'learning_rate': 9.662756316309817e-06, 'epoch': 0.14} + 14%|█▍ | 1757/12188 [3:39:11<20:57:23, 7.23s/it] 14%|█▍ | 1758/12188 [3:39:18<20:41:28, 7.14s/it] {'loss': 0.4028, 'grad_norm': 0.6000093442977116, 'learning_rate': 9.66227643883665e-06, 'epoch': 0.14} + 14%|█▍ | 1758/12188 [3:39:18<20:41:28, 7.14s/it] 14%|█▍ | 1759/12188 [3:39:25<21:13:52, 7.33s/it] {'loss': 0.4273, 'grad_norm': 0.6677074623666664, 'learning_rate': 9.66179623212101e-06, 'epoch': 0.14} + 14%|█▍ | 1759/12188 [3:39:25<21:13:52, 7.33s/it] 14%|█▍ | 1760/12188 [3:39:32<20:38:57, 7.13s/it] {'loss': 0.4394, 'grad_norm': 0.6287131239746976, 'learning_rate': 9.661315696196811e-06, 'epoch': 0.14} + 14%|█▍ | 1760/12188 [3:39:32<20:38:57, 7.13s/it] 14%|█▍ | 1761/12188 [3:39:39<20:14:41, 6.99s/it] {'loss': 0.4717, 'grad_norm': 0.678570390861692, 'learning_rate': 9.660834831097988e-06, 'epoch': 0.14} + 14%|█▍ | 1761/12188 [3:39:39<20:14:41, 6.99s/it] 14%|█▍ | 1762/12188 [3:39:46<20:35:51, 7.11s/it] {'loss': 0.4476, 'grad_norm': 0.6861028565817904, 'learning_rate': 9.660353636858496e-06, 'epoch': 0.14} + 14%|█▍ | 1762/12188 [3:39:46<20:35:51, 7.11s/it] 14%|█▍ | 1763/12188 [3:39:55<21:51:11, 7.55s/it] {'loss': 0.379, 'grad_norm': 0.6543150530348537, 'learning_rate': 9.659872113512316e-06, 'epoch': 0.14} + 14%|█▍ | 1763/12188 [3:39:55<21:51:11, 7.55s/it] 14%|█▍ | 1764/12188 [3:40:02<22:04:08, 7.62s/it] {'loss': 0.3887, 'grad_norm': 0.6602897603028138, 'learning_rate': 9.659390261093456e-06, 'epoch': 0.14} + 14%|█▍ | 1764/12188 [3:40:02<22:04:08, 7.62s/it] 14%|█▍ | 1765/12188 [3:40:09<21:17:38, 7.35s/it] {'loss': 0.4251, 'grad_norm': 0.7168291464543484, 'learning_rate': 9.65890807963594e-06, 'epoch': 0.14} + 14%|█▍ | 1765/12188 [3:40:09<21:17:38, 7.35s/it] 14%|█▍ | 1766/12188 [3:40:16<21:12:01, 7.32s/it] {'loss': 0.3892, 'grad_norm': 0.6466521788297254, 'learning_rate': 9.658425569173822e-06, 'epoch': 0.14} + 14%|█▍ | 1766/12188 [3:40:16<21:12:01, 7.32s/it] 14%|█▍ | 1767/12188 [3:40:23<20:53:36, 7.22s/it] {'loss': 0.3637, 'grad_norm': 0.6298181834954645, 'learning_rate': 9.657942729741174e-06, 'epoch': 0.14} + 14%|█▍ | 1767/12188 [3:40:23<20:53:36, 7.22s/it] 15%|█▍ | 1768/12188 [3:40:31<21:11:15, 7.32s/it] {'loss': 0.4022, 'grad_norm': 0.7082762343759664, 'learning_rate': 9.657459561372094e-06, 'epoch': 0.15} + 15%|█▍ | 1768/12188 [3:40:31<21:11:15, 7.32s/it] 15%|█▍ | 1769/12188 [3:40:38<20:57:55, 7.24s/it] {'loss': 0.3873, 'grad_norm': 0.6830537250556851, 'learning_rate': 9.656976064100703e-06, 'epoch': 0.15} + 15%|█▍ | 1769/12188 [3:40:38<20:57:55, 7.24s/it] 15%|█▍ | 1770/12188 [3:40:46<21:59:17, 7.60s/it] {'loss': 0.3852, 'grad_norm': 0.6148857806002407, 'learning_rate': 9.656492237961143e-06, 'epoch': 0.15} + 15%|█▍ | 1770/12188 [3:40:46<21:59:17, 7.60s/it] 15%|█▍ | 1771/12188 [3:40:53<21:26:57, 7.41s/it] {'loss': 0.3973, 'grad_norm': 0.6624663264313092, 'learning_rate': 9.656008082987584e-06, 'epoch': 0.15} + 15%|█▍ | 1771/12188 [3:40:53<21:26:57, 7.41s/it] 15%|█▍ | 1772/12188 [3:41:01<21:53:56, 7.57s/it] {'loss': 0.3655, 'grad_norm': 0.5979879900864765, 'learning_rate': 9.655523599214215e-06, 'epoch': 0.15} + 15%|█▍ | 1772/12188 [3:41:01<21:53:56, 7.57s/it] 15%|█▍ | 1773/12188 [3:41:08<21:15:09, 7.35s/it] {'loss': 0.4102, 'grad_norm': 0.6520760542080698, 'learning_rate': 9.655038786675247e-06, 'epoch': 0.15} + 15%|█▍ | 1773/12188 [3:41:08<21:15:09, 7.35s/it] 15%|█▍ | 1774/12188 [3:41:15<20:43:07, 7.16s/it] {'loss': 0.4044, 'grad_norm': 0.7166317240722749, 'learning_rate': 9.65455364540492e-06, 'epoch': 0.15} + 15%|█▍ | 1774/12188 [3:41:15<20:43:07, 7.16s/it] 15%|█▍ | 1775/12188 [3:41:23<21:46:15, 7.53s/it] {'loss': 0.4058, 'grad_norm': 0.6528968878892806, 'learning_rate': 9.654068175437492e-06, 'epoch': 0.15} + 15%|█▍ | 1775/12188 [3:41:23<21:46:15, 7.53s/it] 15%|█▍ | 1776/12188 [3:41:30<21:29:33, 7.43s/it] {'loss': 0.3971, 'grad_norm': 0.7119410303141352, 'learning_rate': 9.653582376807248e-06, 'epoch': 0.15} + 15%|█▍ | 1776/12188 [3:41:30<21:29:33, 7.43s/it] 15%|█▍ | 1777/12188 [3:41:37<20:59:11, 7.26s/it] {'loss': 0.417, 'grad_norm': 0.6453242069290013, 'learning_rate': 9.653096249548493e-06, 'epoch': 0.15} + 15%|█▍ | 1777/12188 [3:41:37<20:59:11, 7.26s/it] 15%|█▍ | 1778/12188 [3:41:45<21:40:42, 7.50s/it] {'loss': 0.4442, 'grad_norm': 0.7337638060073384, 'learning_rate': 9.652609793695555e-06, 'epoch': 0.15} + 15%|█▍ | 1778/12188 [3:41:45<21:40:42, 7.50s/it] 15%|█▍ | 1779/12188 [3:41:52<21:10:55, 7.33s/it] {'loss': 0.423, 'grad_norm': 0.6402599782927512, 'learning_rate': 9.65212300928279e-06, 'epoch': 0.15} + 15%|█▍ | 1779/12188 [3:41:52<21:10:55, 7.33s/it] 15%|█▍ | 1780/12188 [3:42:00<21:28:34, 7.43s/it] {'loss': 0.3764, 'grad_norm': 0.6777987125651905, 'learning_rate': 9.651635896344571e-06, 'epoch': 0.15} + 15%|█▍ | 1780/12188 [3:42:00<21:28:34, 7.43s/it] 15%|█▍ | 1781/12188 [3:42:08<22:04:39, 7.64s/it] {'loss': 0.3996, 'grad_norm': 0.6621402060105367, 'learning_rate': 9.6511484549153e-06, 'epoch': 0.15} + 15%|█▍ | 1781/12188 [3:42:08<22:04:39, 7.64s/it] 15%|█▍ | 1782/12188 [3:42:15<21:48:32, 7.54s/it] {'loss': 0.3912, 'grad_norm': 0.6343833929789129, 'learning_rate': 9.650660685029396e-06, 'epoch': 0.15} + 15%|█▍ | 1782/12188 [3:42:15<21:48:32, 7.54s/it] 15%|█▍ | 1783/12188 [3:42:23<21:27:11, 7.42s/it] {'loss': 0.426, 'grad_norm': 0.6529944516503842, 'learning_rate': 9.650172586721308e-06, 'epoch': 0.15} + 15%|█▍ | 1783/12188 [3:42:23<21:27:11, 7.42s/it] 15%|█▍ | 1784/12188 [3:42:30<21:25:46, 7.42s/it] {'loss': 0.4202, 'grad_norm': 0.7153494145962827, 'learning_rate': 9.649684160025503e-06, 'epoch': 0.15} + 15%|█▍ | 1784/12188 [3:42:30<21:25:46, 7.42s/it] 15%|█▍ | 1785/12188 [3:42:39<23:17:06, 8.06s/it] {'loss': 0.4177, 'grad_norm': 0.6309105266436554, 'learning_rate': 9.649195404976471e-06, 'epoch': 0.15} + 15%|█▍ | 1785/12188 [3:42:40<23:17:06, 8.06s/it] 15%|█▍ | 1786/12188 [3:42:46<22:01:56, 7.63s/it] {'loss': 0.4399, 'grad_norm': 0.6688926430512597, 'learning_rate': 9.64870632160873e-06, 'epoch': 0.15} + 15%|█▍ | 1786/12188 [3:42:46<22:01:56, 7.63s/it] 15%|█▍ | 1787/12188 [3:42:54<21:57:03, 7.60s/it] {'loss': 0.3715, 'grad_norm': 0.6767213274423554, 'learning_rate': 9.648216909956818e-06, 'epoch': 0.15} + 15%|█▍ | 1787/12188 [3:42:54<21:57:03, 7.60s/it] 15%|█▍ | 1788/12188 [3:43:02<22:16:45, 7.71s/it] {'loss': 0.3877, 'grad_norm': 0.8659386219596478, 'learning_rate': 9.647727170055294e-06, 'epoch': 0.15} + 15%|█▍ | 1788/12188 [3:43:02<22:16:45, 7.71s/it] 15%|█▍ | 1789/12188 [3:43:08<21:25:24, 7.42s/it] {'loss': 0.4061, 'grad_norm': 0.6681147541967548, 'learning_rate': 9.647237101938748e-06, 'epoch': 0.15} + 15%|█▍ | 1789/12188 [3:43:08<21:25:24, 7.42s/it] 15%|█▍ | 1790/12188 [3:43:15<20:39:14, 7.15s/it] {'loss': 0.3865, 'grad_norm': 0.6327522875204258, 'learning_rate': 9.64674670564178e-06, 'epoch': 0.15} + 15%|█▍ | 1790/12188 [3:43:15<20:39:14, 7.15s/it] 15%|█▍ | 1791/12188 [3:43:23<21:37:56, 7.49s/it] {'loss': 0.3994, 'grad_norm': 0.6422842129333034, 'learning_rate': 9.646255981199027e-06, 'epoch': 0.15} + 15%|█▍ | 1791/12188 [3:43:23<21:37:56, 7.49s/it] 15%|█▍ | 1792/12188 [3:43:32<22:28:51, 7.78s/it] {'loss': 0.3811, 'grad_norm': 0.6014203982237069, 'learning_rate': 9.64576492864514e-06, 'epoch': 0.15} + 15%|█▍ | 1792/12188 [3:43:32<22:28:51, 7.78s/it] 15%|█▍ | 1793/12188 [3:43:38<21:33:32, 7.47s/it] {'loss': 0.4024, 'grad_norm': 0.6551252819434892, 'learning_rate': 9.6452735480148e-06, 'epoch': 0.15} + 15%|█▍ | 1793/12188 [3:43:38<21:33:32, 7.47s/it] 15%|█▍ | 1794/12188 [3:43:46<21:52:45, 7.58s/it] {'loss': 0.4137, 'grad_norm': 0.6178474541553234, 'learning_rate': 9.644781839342706e-06, 'epoch': 0.15} + 15%|█▍ | 1794/12188 [3:43:46<21:52:45, 7.58s/it] 15%|█▍ | 1795/12188 [3:43:54<21:39:26, 7.50s/it] {'loss': 0.4175, 'grad_norm': 0.6442628465558763, 'learning_rate': 9.644289802663578e-06, 'epoch': 0.15} + 15%|█▍ | 1795/12188 [3:43:54<21:39:26, 7.50s/it] 15%|█▍ | 1796/12188 [3:44:04<24:33:54, 8.51s/it] {'loss': 0.404, 'grad_norm': 1.8524331741457358, 'learning_rate': 9.643797438012166e-06, 'epoch': 0.15} + 15%|█▍ | 1796/12188 [3:44:04<24:33:54, 8.51s/it] 15%|█▍ | 1797/12188 [3:44:11<23:15:45, 8.06s/it] {'loss': 0.3913, 'grad_norm': 0.6706996202233672, 'learning_rate': 9.643304745423241e-06, 'epoch': 0.15} + 15%|█▍ | 1797/12188 [3:44:11<23:15:45, 8.06s/it] 15%|█▍ | 1798/12188 [3:44:18<22:08:32, 7.67s/it] {'loss': 0.4046, 'grad_norm': 0.725673945715908, 'learning_rate': 9.642811724931595e-06, 'epoch': 0.15} + 15%|█▍ | 1798/12188 [3:44:18<22:08:32, 7.67s/it] 15%|█▍ | 1799/12188 [3:44:25<21:33:37, 7.47s/it] {'loss': 0.4349, 'grad_norm': 0.7066561668135817, 'learning_rate': 9.642318376572044e-06, 'epoch': 0.15} + 15%|█▍ | 1799/12188 [3:44:25<21:33:37, 7.47s/it] 15%|█▍ | 1800/12188 [3:44:33<22:03:52, 7.65s/it] {'loss': 0.4058, 'grad_norm': 0.6600143158272705, 'learning_rate': 9.641824700379427e-06, 'epoch': 0.15} + 15%|█▍ | 1800/12188 [3:44:33<22:03:52, 7.65s/it] 15%|█▍ | 1801/12188 [3:44:40<21:41:51, 7.52s/it] {'loss': 0.4123, 'grad_norm': 0.6765257982493408, 'learning_rate': 9.64133069638861e-06, 'epoch': 0.15} + 15%|█▍ | 1801/12188 [3:44:40<21:41:51, 7.52s/it] 15%|█▍ | 1802/12188 [3:44:48<21:48:23, 7.56s/it] {'loss': 0.4351, 'grad_norm': 0.6991802995648302, 'learning_rate': 9.640836364634473e-06, 'epoch': 0.15} + 15%|█▍ | 1802/12188 [3:44:48<21:48:23, 7.56s/it] 15%|█▍ | 1803/12188 [3:44:55<21:02:01, 7.29s/it] {'loss': 0.3812, 'grad_norm': 0.6709754498558876, 'learning_rate': 9.640341705151929e-06, 'epoch': 0.15} + 15%|█▍ | 1803/12188 [3:44:55<21:02:01, 7.29s/it] 15%|█▍ | 1804/12188 [3:45:02<21:05:18, 7.31s/it] {'loss': 0.407, 'grad_norm': 0.6903113714496548, 'learning_rate': 9.639846717975909e-06, 'epoch': 0.15} + 15%|█▍ | 1804/12188 [3:45:02<21:05:18, 7.31s/it] 15%|█▍ | 1805/12188 [3:45:10<21:26:19, 7.43s/it] {'loss': 0.4408, 'grad_norm': 0.6652964021507553, 'learning_rate': 9.639351403141368e-06, 'epoch': 0.15} + 15%|█▍ | 1805/12188 [3:45:10<21:26:19, 7.43s/it] 15%|█▍ | 1806/12188 [3:45:19<23:18:30, 8.08s/it] {'loss': 0.4323, 'grad_norm': 0.699769433439595, 'learning_rate': 9.638855760683285e-06, 'epoch': 0.15} + 15%|█▍ | 1806/12188 [3:45:19<23:18:30, 8.08s/it] 15%|█▍ | 1807/12188 [3:45:26<22:13:44, 7.71s/it] {'loss': 0.3943, 'grad_norm': 0.6422648339841057, 'learning_rate': 9.63835979063666e-06, 'epoch': 0.15} + 15%|█▍ | 1807/12188 [3:45:26<22:13:44, 7.71s/it] 15%|█▍ | 1808/12188 [3:45:33<21:24:22, 7.42s/it] {'loss': 0.3784, 'grad_norm': 0.7378339180262528, 'learning_rate': 9.637863493036522e-06, 'epoch': 0.15} + 15%|█▍ | 1808/12188 [3:45:33<21:24:22, 7.42s/it] 15%|█▍ | 1809/12188 [3:45:40<20:56:29, 7.26s/it] {'loss': 0.463, 'grad_norm': 0.7142607088329871, 'learning_rate': 9.637366867917912e-06, 'epoch': 0.15} + 15%|█▍ | 1809/12188 [3:45:40<20:56:29, 7.26s/it] 15%|█▍ | 1810/12188 [3:45:47<20:32:13, 7.12s/it] {'loss': 0.397, 'grad_norm': 0.7264737187166976, 'learning_rate': 9.636869915315905e-06, 'epoch': 0.15} + 15%|█▍ | 1810/12188 [3:45:47<20:32:13, 7.12s/it] 15%|█▍ | 1811/12188 [3:45:54<20:40:17, 7.17s/it] {'loss': 0.4573, 'grad_norm': 0.7381463155556915, 'learning_rate': 9.636372635265595e-06, 'epoch': 0.15} + 15%|█▍ | 1811/12188 [3:45:54<20:40:17, 7.17s/it] 15%|█▍ | 1812/12188 [3:46:01<20:23:40, 7.08s/it] {'loss': 0.4764, 'grad_norm': 0.65705137928414, 'learning_rate': 9.635875027802097e-06, 'epoch': 0.15} + 15%|█▍ | 1812/12188 [3:46:01<20:23:40, 7.08s/it] 15%|█▍ | 1813/12188 [3:46:08<20:25:43, 7.09s/it] {'loss': 0.3823, 'grad_norm': 0.6566487610245988, 'learning_rate': 9.635377092960554e-06, 'epoch': 0.15} + 15%|█▍ | 1813/12188 [3:46:08<20:25:43, 7.09s/it] 15%|█▍ | 1814/12188 [3:46:15<20:48:51, 7.22s/it] {'loss': 0.36, 'grad_norm': 0.6329750521544278, 'learning_rate': 9.634878830776128e-06, 'epoch': 0.15} + 15%|█▍ | 1814/12188 [3:46:16<20:48:51, 7.22s/it] 15%|█▍ | 1815/12188 [3:46:23<21:15:07, 7.38s/it] {'loss': 0.3671, 'grad_norm': 0.6058107816356366, 'learning_rate': 9.634380241284005e-06, 'epoch': 0.15} + 15%|█▍ | 1815/12188 [3:46:23<21:15:07, 7.38s/it] 15%|█▍ | 1816/12188 [3:46:31<21:20:16, 7.41s/it] {'loss': 0.3944, 'grad_norm': 0.738036296580335, 'learning_rate': 9.633881324519397e-06, 'epoch': 0.15} + 15%|█▍ | 1816/12188 [3:46:31<21:20:16, 7.41s/it] 15%|█▍ | 1817/12188 [3:46:37<20:43:03, 7.19s/it] {'loss': 0.3594, 'grad_norm': 0.7040162399759994, 'learning_rate': 9.633382080517533e-06, 'epoch': 0.15} + 15%|█▍ | 1817/12188 [3:46:37<20:43:03, 7.19s/it] 15%|█▍ | 1818/12188 [3:46:44<20:17:06, 7.04s/it] {'loss': 0.4742, 'grad_norm': 0.7224331403652545, 'learning_rate': 9.63288250931367e-06, 'epoch': 0.15} + 15%|█▍ | 1818/12188 [3:46:44<20:17:06, 7.04s/it] 15%|█▍ | 1819/12188 [3:46:51<19:59:03, 6.94s/it] {'loss': 0.3708, 'grad_norm': 0.7252701211769775, 'learning_rate': 9.632382610943088e-06, 'epoch': 0.15} + 15%|█▍ | 1819/12188 [3:46:51<19:59:03, 6.94s/it] 15%|█▍ | 1820/12188 [3:46:58<19:52:00, 6.90s/it] {'loss': 0.4372, 'grad_norm': 0.7041878827281002, 'learning_rate': 9.631882385441088e-06, 'epoch': 0.15} + 15%|█▍ | 1820/12188 [3:46:58<19:52:00, 6.90s/it] 15%|█▍ | 1821/12188 [3:47:05<19:58:30, 6.94s/it] {'loss': 0.4081, 'grad_norm': 0.6355876792508546, 'learning_rate': 9.631381832842996e-06, 'epoch': 0.15} + 15%|█▍ | 1821/12188 [3:47:05<19:58:30, 6.94s/it] 15%|█▍ | 1822/12188 [3:47:11<19:53:31, 6.91s/it] {'loss': 0.4391, 'grad_norm': 0.7001064516011452, 'learning_rate': 9.630880953184161e-06, 'epoch': 0.15} + 15%|█▍ | 1822/12188 [3:47:11<19:53:31, 6.91s/it] 15%|█▍ | 1823/12188 [3:47:20<21:08:29, 7.34s/it] {'loss': 0.4541, 'grad_norm': 0.722373457072694, 'learning_rate': 9.630379746499951e-06, 'epoch': 0.15} + 15%|█▍ | 1823/12188 [3:47:20<21:08:29, 7.34s/it] 15%|█▍ | 1824/12188 [3:47:28<22:07:23, 7.68s/it] {'loss': 0.3512, 'grad_norm': 0.6498748948699501, 'learning_rate': 9.629878212825763e-06, 'epoch': 0.15} + 15%|█▍ | 1824/12188 [3:47:28<22:07:23, 7.68s/it] 15%|█▍ | 1825/12188 [3:47:35<21:25:14, 7.44s/it] {'loss': 0.3915, 'grad_norm': 0.9635754658889876, 'learning_rate': 9.629376352197017e-06, 'epoch': 0.15} + 15%|█▍ | 1825/12188 [3:47:35<21:25:14, 7.44s/it] 15%|█▍ | 1826/12188 [3:47:43<21:40:32, 7.53s/it] {'loss': 0.3937, 'grad_norm': 0.6493405525469551, 'learning_rate': 9.628874164649147e-06, 'epoch': 0.15} + 15%|█▍ | 1826/12188 [3:47:43<21:40:32, 7.53s/it] 15%|█▍ | 1827/12188 [3:47:50<21:32:23, 7.48s/it] {'loss': 0.3827, 'grad_norm': 0.6654530409628264, 'learning_rate': 9.628371650217623e-06, 'epoch': 0.15} + 15%|█▍ | 1827/12188 [3:47:50<21:32:23, 7.48s/it] 15%|█▍ | 1828/12188 [3:47:57<20:48:56, 7.23s/it] {'loss': 0.4175, 'grad_norm': 0.6455910347418785, 'learning_rate': 9.627868808937927e-06, 'epoch': 0.15} + 15%|█▍ | 1828/12188 [3:47:57<20:48:56, 7.23s/it] 15%|█▌ | 1829/12188 [3:48:05<21:10:08, 7.36s/it] {'loss': 0.3985, 'grad_norm': 0.7116758613133379, 'learning_rate': 9.627365640845571e-06, 'epoch': 0.15} + 15%|█▌ | 1829/12188 [3:48:05<21:10:08, 7.36s/it] 15%|█▌ | 1830/12188 [3:48:11<20:38:15, 7.17s/it] {'loss': 0.4053, 'grad_norm': 0.8186316931604863, 'learning_rate': 9.62686214597609e-06, 'epoch': 0.15} + 15%|█▌ | 1830/12188 [3:48:11<20:38:15, 7.17s/it] 15%|█▌ | 1831/12188 [3:48:19<20:44:57, 7.21s/it] {'loss': 0.3873, 'grad_norm': 1.0202609000165899, 'learning_rate': 9.626358324365036e-06, 'epoch': 0.15} + 15%|█▌ | 1831/12188 [3:48:19<20:44:57, 7.21s/it] 15%|█▌ | 1832/12188 [3:48:25<20:15:12, 7.04s/it] {'loss': 0.3751, 'grad_norm': 0.6350717539560979, 'learning_rate': 9.625854176047991e-06, 'epoch': 0.15} + 15%|█▌ | 1832/12188 [3:48:25<20:15:12, 7.04s/it] 15%|█▌ | 1833/12188 [3:48:32<20:04:36, 6.98s/it] {'loss': 0.4188, 'grad_norm': 0.6728504365355287, 'learning_rate': 9.625349701060556e-06, 'epoch': 0.15} + 15%|█▌ | 1833/12188 [3:48:32<20:04:36, 6.98s/it] 15%|█▌ | 1834/12188 [3:48:39<20:03:50, 6.98s/it] {'loss': 0.3537, 'grad_norm': 2.2083616329065823, 'learning_rate': 9.624844899438356e-06, 'epoch': 0.15} + 15%|█▌ | 1834/12188 [3:48:39<20:03:50, 6.98s/it] 15%|█▌ | 1835/12188 [3:48:46<20:01:59, 6.97s/it] {'loss': 0.4625, 'grad_norm': 0.7334929788608036, 'learning_rate': 9.62433977121704e-06, 'epoch': 0.15} + 15%|█▌ | 1835/12188 [3:48:46<20:01:59, 6.97s/it] 15%|█▌ | 1836/12188 [3:48:54<20:57:43, 7.29s/it] {'loss': 0.4404, 'grad_norm': 0.6828869951091898, 'learning_rate': 9.623834316432279e-06, 'epoch': 0.15} + 15%|█▌ | 1836/12188 [3:48:54<20:57:43, 7.29s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fcea7585030> +[Try #0] Failed to fetch sample 4567615 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fcea7585030> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Printable version'"}, {'from': 'gpt', 'value': '\nclick(x=0.865, y=0.429)\n'}]} + 15%|█▌ | 1837/12188 [3:49:01<21:01:23, 7.31s/it] {'loss': 0.4194, 'grad_norm': 0.6644741027399357, 'learning_rate': 9.623328535119766e-06, 'epoch': 0.15} + 15%|█▌ | 1837/12188 [3:49:01<21:01:23, 7.31s/it] 15%|█▌ | 1838/12188 [3:49:09<21:32:30, 7.49s/it] {'loss': 0.4013, 'grad_norm': 0.8682427681674405, 'learning_rate': 9.62282242731522e-06, 'epoch': 0.15} + 15%|█▌ | 1838/12188 [3:49:09<21:32:30, 7.49s/it] 15%|█▌ | 1839/12188 [3:49:16<20:58:28, 7.30s/it] {'loss': 0.3943, 'grad_norm': 0.7849331144475015, 'learning_rate': 9.622315993054384e-06, 'epoch': 0.15} + 15%|█▌ | 1839/12188 [3:49:16<20:58:28, 7.30s/it] 15%|█▌ | 1840/12188 [3:49:23<20:14:19, 7.04s/it] {'loss': 0.4078, 'grad_norm': 0.6791649922155487, 'learning_rate': 9.621809232373016e-06, 'epoch': 0.15} + 15%|█▌ | 1840/12188 [3:49:23<20:14:19, 7.04s/it] 15%|█▌ | 1841/12188 [3:49:30<20:24:33, 7.10s/it] {'loss': 0.4087, 'grad_norm': 0.7717333061775307, 'learning_rate': 9.621302145306906e-06, 'epoch': 0.15} + 15%|█▌ | 1841/12188 [3:49:30<20:24:33, 7.10s/it] 15%|█▌ | 1842/12188 [3:49:37<20:18:27, 7.07s/it] {'loss': 0.3889, 'grad_norm': 0.7599313716690608, 'learning_rate': 9.620794731891862e-06, 'epoch': 0.15} + 15%|█▌ | 1842/12188 [3:49:37<20:18:27, 7.07s/it] 15%|█▌ | 1843/12188 [3:49:44<20:27:12, 7.12s/it] {'loss': 0.3743, 'grad_norm': 0.6480671446379147, 'learning_rate': 9.620286992163722e-06, 'epoch': 0.15} + 15%|█▌ | 1843/12188 [3:49:44<20:27:12, 7.12s/it] 15%|█▌ | 1844/12188 [3:49:52<21:07:21, 7.35s/it] {'loss': 0.3982, 'grad_norm': 0.6341921825821116, 'learning_rate': 9.619778926158334e-06, 'epoch': 0.15} + 15%|█▌ | 1844/12188 [3:49:52<21:07:21, 7.35s/it] 15%|█▌ | 1845/12188 [3:49:59<21:03:40, 7.33s/it] {'loss': 0.4124, 'grad_norm': 0.7934183144315617, 'learning_rate': 9.619270533911583e-06, 'epoch': 0.15} + 15%|█▌ | 1845/12188 [3:49:59<21:03:40, 7.33s/it] 15%|█▌ | 1846/12188 [3:50:06<20:49:09, 7.25s/it] {'loss': 0.4084, 'grad_norm': 0.7257089581445592, 'learning_rate': 9.618761815459366e-06, 'epoch': 0.15} + 15%|█▌ | 1846/12188 [3:50:06<20:49:09, 7.25s/it] 15%|█▌ | 1847/12188 [3:50:14<20:52:48, 7.27s/it] {'loss': 0.4134, 'grad_norm': 0.6669503952731342, 'learning_rate': 9.61825277083761e-06, 'epoch': 0.15} + 15%|█▌ | 1847/12188 [3:50:14<20:52:48, 7.27s/it] 15%|█▌ | 1848/12188 [3:50:21<20:32:47, 7.15s/it] {'loss': 0.4127, 'grad_norm': 0.7200350540924865, 'learning_rate': 9.617743400082264e-06, 'epoch': 0.15} + 15%|█▌ | 1848/12188 [3:50:21<20:32:47, 7.15s/it] 15%|█▌ | 1849/12188 [3:50:27<20:18:39, 7.07s/it] {'loss': 0.4624, 'grad_norm': 0.7789687621364638, 'learning_rate': 9.617233703229298e-06, 'epoch': 0.15} + 15%|█▌ | 1849/12188 [3:50:27<20:18:39, 7.07s/it] 15%|█▌ | 1850/12188 [3:50:34<20:18:00, 7.07s/it] {'loss': 0.3985, 'grad_norm': 0.6477220728483736, 'learning_rate': 9.616723680314708e-06, 'epoch': 0.15} + 15%|█▌ | 1850/12188 [3:50:34<20:18:00, 7.07s/it] 15%|█▌ | 1851/12188 [3:50:41<19:51:00, 6.91s/it] {'loss': 0.3988, 'grad_norm': 0.7313832574885751, 'learning_rate': 9.616213331374507e-06, 'epoch': 0.15} + 15%|█▌ | 1851/12188 [3:50:41<19:51:00, 6.91s/it] 15%|█▌ | 1852/12188 [3:50:48<19:58:27, 6.96s/it] {'loss': 0.4094, 'grad_norm': 0.6859749315589025, 'learning_rate': 9.615702656444736e-06, 'epoch': 0.15} + 15%|█▌ | 1852/12188 [3:50:48<19:58:27, 6.96s/it] 15%|█▌ | 1853/12188 [3:50:55<19:57:22, 6.95s/it] {'loss': 0.3777, 'grad_norm': 0.8889611205508777, 'learning_rate': 9.615191655561462e-06, 'epoch': 0.15} + 15%|█▌ | 1853/12188 [3:50:55<19:57:22, 6.95s/it] 15%|█▌ | 1854/12188 [3:51:02<19:52:22, 6.92s/it] {'loss': 0.4071, 'grad_norm': 0.7412358952295893, 'learning_rate': 9.614680328760766e-06, 'epoch': 0.15} + 15%|█▌ | 1854/12188 [3:51:02<19:52:22, 6.92s/it] 15%|█▌ | 1855/12188 [3:51:09<20:09:25, 7.02s/it] {'loss': 0.398, 'grad_norm': 0.8093300785912457, 'learning_rate': 9.61416867607876e-06, 'epoch': 0.15} + 15%|█▌ | 1855/12188 [3:51:09<20:09:25, 7.02s/it] 15%|█▌ | 1856/12188 [3:51:16<19:48:30, 6.90s/it] {'loss': 0.4161, 'grad_norm': 0.6828719042061052, 'learning_rate': 9.613656697551575e-06, 'epoch': 0.15} + 15%|█▌ | 1856/12188 [3:51:16<19:48:30, 6.90s/it] 15%|█▌ | 1857/12188 [3:51:23<19:53:15, 6.93s/it] {'loss': 0.378, 'grad_norm': 0.7220147270241172, 'learning_rate': 9.613144393215367e-06, 'epoch': 0.15} + 15%|█▌ | 1857/12188 [3:51:23<19:53:15, 6.93s/it] 15%|█▌ | 1858/12188 [3:51:30<20:01:55, 6.98s/it] {'loss': 0.4123, 'grad_norm': 1.019343157328132, 'learning_rate': 9.612631763106312e-06, 'epoch': 0.15} + 15%|█▌ | 1858/12188 [3:51:30<20:01:55, 6.98s/it] 15%|█▌ | 1859/12188 [3:51:38<21:15:28, 7.41s/it] {'loss': 0.3721, 'grad_norm': 0.771487606386382, 'learning_rate': 9.612118807260615e-06, 'epoch': 0.15} + 15%|█▌ | 1859/12188 [3:51:38<21:15:28, 7.41s/it] 15%|█▌ | 1860/12188 [3:51:45<20:56:36, 7.30s/it] {'loss': 0.3883, 'grad_norm': 0.8596852347415576, 'learning_rate': 9.611605525714495e-06, 'epoch': 0.15} + 15%|█▌ | 1860/12188 [3:51:45<20:56:36, 7.30s/it] 15%|█▌ | 1861/12188 [3:51:52<20:24:47, 7.12s/it] {'loss': 0.3826, 'grad_norm': 0.7217531869115089, 'learning_rate': 9.611091918504206e-06, 'epoch': 0.15} + 15%|█▌ | 1861/12188 [3:51:52<20:24:47, 7.12s/it] 15%|█▌ | 1862/12188 [3:51:59<20:07:59, 7.02s/it] {'loss': 0.3767, 'grad_norm': 0.6050684085858685, 'learning_rate': 9.61057798566601e-06, 'epoch': 0.15} + 15%|█▌ | 1862/12188 [3:51:59<20:07:59, 7.02s/it] 15%|█▌ | 1863/12188 [3:52:05<19:48:15, 6.91s/it] {'loss': 0.4076, 'grad_norm': 0.6674442075950593, 'learning_rate': 9.610063727236205e-06, 'epoch': 0.15} + 15%|█▌ | 1863/12188 [3:52:05<19:48:15, 6.91s/it] 15%|█▌ | 1864/12188 [3:52:12<19:56:29, 6.95s/it] {'loss': 0.4325, 'grad_norm': 0.737433006273814, 'learning_rate': 9.609549143251107e-06, 'epoch': 0.15} + 15%|█▌ | 1864/12188 [3:52:12<19:56:29, 6.95s/it] 15%|█▌ | 1865/12188 [3:52:19<19:31:55, 6.81s/it] {'loss': 0.416, 'grad_norm': 0.7883290143524665, 'learning_rate': 9.609034233747055e-06, 'epoch': 0.15} + 15%|█▌ | 1865/12188 [3:52:19<19:31:55, 6.81s/it] 15%|█▌ | 1866/12188 [3:52:26<19:51:22, 6.93s/it] {'loss': 0.4163, 'grad_norm': 1.2490605539637176, 'learning_rate': 9.60851899876041e-06, 'epoch': 0.15} + 15%|█▌ | 1866/12188 [3:52:26<19:51:22, 6.93s/it] 15%|█▌ | 1867/12188 [3:52:33<19:59:47, 6.97s/it] {'loss': 0.3909, 'grad_norm': 0.9313429167704237, 'learning_rate': 9.608003438327556e-06, 'epoch': 0.15} + 15%|█▌ | 1867/12188 [3:52:33<19:59:47, 6.97s/it] 15%|█▌ | 1868/12188 [3:52:40<20:06:54, 7.02s/it] {'loss': 0.402, 'grad_norm': 0.8001806640224577, 'learning_rate': 9.607487552484904e-06, 'epoch': 0.15} + 15%|█▌ | 1868/12188 [3:52:40<20:06:54, 7.02s/it] 15%|█▌ | 1869/12188 [3:52:48<20:32:07, 7.16s/it] {'loss': 0.3721, 'grad_norm': 0.7398905661268993, 'learning_rate': 9.606971341268882e-06, 'epoch': 0.15} + 15%|█▌ | 1869/12188 [3:52:48<20:32:07, 7.16s/it] 15%|█▌ | 1870/12188 [3:52:55<20:37:19, 7.20s/it] {'loss': 0.3965, 'grad_norm': 0.8119327839083773, 'learning_rate': 9.606454804715948e-06, 'epoch': 0.15} + 15%|█▌ | 1870/12188 [3:52:55<20:37:19, 7.20s/it] 15%|█▌ | 1871/12188 [3:53:02<20:28:17, 7.14s/it] {'loss': 0.421, 'grad_norm': 0.766634374854435, 'learning_rate': 9.605937942862574e-06, 'epoch': 0.15} + 15%|█▌ | 1871/12188 [3:53:02<20:28:17, 7.14s/it] 15%|█▌ | 1872/12188 [3:53:10<21:10:42, 7.39s/it] {'loss': 0.4006, 'grad_norm': 0.7143498448980892, 'learning_rate': 9.605420755745263e-06, 'epoch': 0.15} + 15%|█▌ | 1872/12188 [3:53:10<21:10:42, 7.39s/it] 15%|█▌ | 1873/12188 [3:53:17<21:01:08, 7.34s/it] {'loss': 0.3955, 'grad_norm': 0.6709932912971895, 'learning_rate': 9.604903243400538e-06, 'epoch': 0.15} + 15%|█▌ | 1873/12188 [3:53:17<21:01:08, 7.34s/it] 15%|█▌ | 1874/12188 [3:53:24<20:27:37, 7.14s/it] {'loss': 0.3954, 'grad_norm': 0.9725082051373911, 'learning_rate': 9.604385405864944e-06, 'epoch': 0.15} + 15%|█▌ | 1874/12188 [3:53:24<20:27:37, 7.14s/it] 15%|█▌ | 1875/12188 [3:53:31<20:15:15, 7.07s/it] {'loss': 0.3812, 'grad_norm': 0.6831034823191432, 'learning_rate': 9.60386724317505e-06, 'epoch': 0.15} + 15%|█▌ | 1875/12188 [3:53:31<20:15:15, 7.07s/it] 15%|█▌ | 1876/12188 [3:53:38<20:14:29, 7.07s/it] {'loss': 0.4231, 'grad_norm': 0.9279289429372167, 'learning_rate': 9.60334875536745e-06, 'epoch': 0.15} + 15%|█▌ | 1876/12188 [3:53:38<20:14:29, 7.07s/it] 15%|█▌ | 1877/12188 [3:53:45<20:02:59, 7.00s/it] {'loss': 0.3686, 'grad_norm': 0.5682137902193274, 'learning_rate': 9.602829942478756e-06, 'epoch': 0.15} + 15%|█▌ | 1877/12188 [3:53:45<20:02:59, 7.00s/it] 15%|█▌ | 1878/12188 [3:53:52<20:14:17, 7.07s/it] {'loss': 0.4023, 'grad_norm': 0.6609108185261404, 'learning_rate': 9.602310804545605e-06, 'epoch': 0.15} + 15%|█▌ | 1878/12188 [3:53:52<20:14:17, 7.07s/it] 15%|█▌ | 1879/12188 [3:53:59<20:19:53, 7.10s/it] {'loss': 0.3952, 'grad_norm': 0.8081346868549867, 'learning_rate': 9.601791341604658e-06, 'epoch': 0.15} + 15%|█▌ | 1879/12188 [3:53:59<20:19:53, 7.10s/it] 15%|█▌ | 1880/12188 [3:54:06<20:21:52, 7.11s/it] {'loss': 0.3963, 'grad_norm': 0.7868872758547408, 'learning_rate': 9.601271553692603e-06, 'epoch': 0.15} + 15%|█▌ | 1880/12188 [3:54:06<20:21:52, 7.11s/it] 15%|█▌ | 1881/12188 [3:54:14<20:28:43, 7.15s/it] {'loss': 0.369, 'grad_norm': 0.7453360617918365, 'learning_rate': 9.600751440846142e-06, 'epoch': 0.15} + 15%|█▌ | 1881/12188 [3:54:14<20:28:43, 7.15s/it] 15%|█▌ | 1882/12188 [3:54:20<20:09:44, 7.04s/it] {'loss': 0.389, 'grad_norm': 0.7997994362176901, 'learning_rate': 9.600231003102006e-06, 'epoch': 0.15} + 15%|█▌ | 1882/12188 [3:54:20<20:09:44, 7.04s/it] 15%|█▌ | 1883/12188 [3:54:28<20:51:33, 7.29s/it] {'loss': 0.4285, 'grad_norm': 0.8364042279113968, 'learning_rate': 9.599710240496946e-06, 'epoch': 0.15} + 15%|█▌ | 1883/12188 [3:54:28<20:51:33, 7.29s/it] 15%|█▌ | 1884/12188 [3:54:35<20:08:31, 7.04s/it] {'loss': 0.4329, 'grad_norm': 0.8366251544825906, 'learning_rate': 9.59918915306774e-06, 'epoch': 0.15} + 15%|█▌ | 1884/12188 [3:54:35<20:08:31, 7.04s/it] 15%|█▌ | 1885/12188 [3:54:43<21:07:32, 7.38s/it] {'loss': 0.3685, 'grad_norm': 0.8616323462080726, 'learning_rate': 9.598667740851187e-06, 'epoch': 0.15} + 15%|█▌ | 1885/12188 [3:54:43<21:07:32, 7.38s/it] 15%|█▌ | 1886/12188 [3:54:49<20:25:40, 7.14s/it] {'loss': 0.4375, 'grad_norm': 0.7639225136960833, 'learning_rate': 9.598146003884103e-06, 'epoch': 0.15} + 15%|█▌ | 1886/12188 [3:54:49<20:25:40, 7.14s/it] 15%|█▌ | 1887/12188 [3:54:56<20:10:56, 7.05s/it] {'loss': 0.4339, 'grad_norm': 0.676434123459871, 'learning_rate': 9.597623942203337e-06, 'epoch': 0.15} + 15%|█▌ | 1887/12188 [3:54:56<20:10:56, 7.05s/it] 15%|█▌ | 1888/12188 [3:55:03<20:16:22, 7.09s/it] {'loss': 0.4067, 'grad_norm': 0.6568700793947285, 'learning_rate': 9.597101555845755e-06, 'epoch': 0.15} + 15%|█▌ | 1888/12188 [3:55:03<20:16:22, 7.09s/it] 15%|█▌ | 1889/12188 [3:55:12<21:27:40, 7.50s/it] {'loss': 0.4019, 'grad_norm': 0.6898847525327378, 'learning_rate': 9.596578844848248e-06, 'epoch': 0.15} + 15%|█▌ | 1889/12188 [3:55:12<21:27:40, 7.50s/it] 16%|█▌ | 1890/12188 [3:55:19<21:22:03, 7.47s/it] {'loss': 0.4215, 'grad_norm': 0.7230864807115737, 'learning_rate': 9.596055809247727e-06, 'epoch': 0.16} + 16%|█▌ | 1890/12188 [3:55:19<21:22:03, 7.47s/it] 16%|█▌ | 1891/12188 [3:55:26<20:46:45, 7.26s/it] {'loss': 0.3965, 'grad_norm': 0.7121888353139539, 'learning_rate': 9.595532449081128e-06, 'epoch': 0.16} + 16%|█▌ | 1891/12188 [3:55:26<20:46:45, 7.26s/it] 16%|█▌ | 1892/12188 [3:55:34<21:08:38, 7.39s/it] {'loss': 0.4081, 'grad_norm': 0.7329971853674438, 'learning_rate': 9.595008764385412e-06, 'epoch': 0.16} + 16%|█▌ | 1892/12188 [3:55:34<21:08:38, 7.39s/it] 16%|█▌ | 1893/12188 [3:55:41<20:49:27, 7.28s/it] {'loss': 0.3938, 'grad_norm': 0.6991748405418516, 'learning_rate': 9.59448475519756e-06, 'epoch': 0.16} + 16%|█▌ | 1893/12188 [3:55:41<20:49:27, 7.28s/it] 16%|█▌ | 1894/12188 [3:55:48<20:18:29, 7.10s/it] {'loss': 0.3644, 'grad_norm': 0.8053940419421122, 'learning_rate': 9.593960421554576e-06, 'epoch': 0.16} + 16%|█▌ | 1894/12188 [3:55:48<20:18:29, 7.10s/it] 16%|█▌ | 1895/12188 [3:55:55<20:59:36, 7.34s/it] {'loss': 0.3815, 'grad_norm': 1.003890782577942, 'learning_rate': 9.593435763493485e-06, 'epoch': 0.16} + 16%|█▌ | 1895/12188 [3:55:55<20:59:36, 7.34s/it] 16%|█▌ | 1896/12188 [3:56:03<21:06:50, 7.39s/it] {'loss': 0.3713, 'grad_norm': 0.7488656545348362, 'learning_rate': 9.592910781051344e-06, 'epoch': 0.16} + 16%|█▌ | 1896/12188 [3:56:03<21:06:50, 7.39s/it] 16%|█▌ | 1897/12188 [3:56:11<21:38:50, 7.57s/it] {'loss': 0.389, 'grad_norm': 0.8878539374585147, 'learning_rate': 9.592385474265222e-06, 'epoch': 0.16} + 16%|█▌ | 1897/12188 [3:56:11<21:38:50, 7.57s/it] 16%|█▌ | 1898/12188 [3:56:20<23:02:14, 8.06s/it] {'loss': 0.3602, 'grad_norm': 0.8517589865853034, 'learning_rate': 9.591859843172215e-06, 'epoch': 0.16} + 16%|█▌ | 1898/12188 [3:56:20<23:02:14, 8.06s/it] 16%|█▌ | 1899/12188 [3:56:27<22:08:33, 7.75s/it] {'loss': 0.3995, 'grad_norm': 1.0208696141636573, 'learning_rate': 9.591333887809441e-06, 'epoch': 0.16} + 16%|█▌ | 1899/12188 [3:56:27<22:08:33, 7.75s/it] 16%|█▌ | 1900/12188 [3:56:34<21:19:55, 7.46s/it] {'loss': 0.4344, 'grad_norm': 0.9054219441350552, 'learning_rate': 9.59080760821405e-06, 'epoch': 0.16} + 16%|█▌ | 1900/12188 [3:56:34<21:19:55, 7.46s/it] 16%|█▌ | 1901/12188 [3:56:41<20:51:13, 7.30s/it] {'loss': 0.391, 'grad_norm': 0.8634113871953338, 'learning_rate': 9.590281004423196e-06, 'epoch': 0.16} + 16%|█▌ | 1901/12188 [3:56:41<20:51:13, 7.30s/it] 16%|█▌ | 1902/12188 [3:56:48<20:42:03, 7.25s/it] {'loss': 0.4101, 'grad_norm': 0.7641915571996383, 'learning_rate': 9.589754076474077e-06, 'epoch': 0.16} + 16%|█▌ | 1902/12188 [3:56:48<20:42:03, 7.25s/it] 16%|█▌ | 1903/12188 [3:56:55<20:46:45, 7.27s/it] {'loss': 0.3846, 'grad_norm': 0.7413375795144811, 'learning_rate': 9.589226824403895e-06, 'epoch': 0.16} + 16%|█▌ | 1903/12188 [3:56:55<20:46:45, 7.27s/it] 16%|█▌ | 1904/12188 [3:57:02<20:37:54, 7.22s/it] {'loss': 0.3898, 'grad_norm': 0.7542964218527678, 'learning_rate': 9.588699248249889e-06, 'epoch': 0.16} + 16%|█▌ | 1904/12188 [3:57:02<20:37:54, 7.22s/it] 16%|█▌ | 1905/12188 [3:57:10<21:12:12, 7.42s/it] {'loss': 0.3518, 'grad_norm': 0.7647636541827816, 'learning_rate': 9.588171348049316e-06, 'epoch': 0.16} + 16%|█▌ | 1905/12188 [3:57:10<21:12:12, 7.42s/it] 16%|█▌ | 1906/12188 [3:57:17<20:29:02, 7.17s/it] {'loss': 0.3923, 'grad_norm': 1.0625161619101555, 'learning_rate': 9.587643123839451e-06, 'epoch': 0.16} + 16%|█▌ | 1906/12188 [3:57:17<20:29:02, 7.17s/it] 16%|█▌ | 1907/12188 [3:57:25<21:16:01, 7.45s/it] {'loss': 0.3979, 'grad_norm': 1.1278713748856695, 'learning_rate': 9.587114575657603e-06, 'epoch': 0.16} + 16%|█▌ | 1907/12188 [3:57:25<21:16:01, 7.45s/it] 16%|█▌ | 1908/12188 [3:57:33<21:48:57, 7.64s/it] {'loss': 0.4106, 'grad_norm': 0.7843092945758545, 'learning_rate': 9.586585703541092e-06, 'epoch': 0.16} + 16%|█▌ | 1908/12188 [3:57:33<21:48:57, 7.64s/it] 16%|█▌ | 1909/12188 [3:57:40<21:16:55, 7.45s/it] {'loss': 0.4328, 'grad_norm': 0.7049276949793865, 'learning_rate': 9.586056507527266e-06, 'epoch': 0.16} + 16%|█▌ | 1909/12188 [3:57:40<21:16:55, 7.45s/it] 16%|█▌ | 1910/12188 [3:57:48<21:23:18, 7.49s/it] {'loss': 0.4095, 'grad_norm': 0.8602624927156759, 'learning_rate': 9.5855269876535e-06, 'epoch': 0.16} + 16%|█▌ | 1910/12188 [3:57:48<21:23:18, 7.49s/it] 16%|█▌ | 1911/12188 [3:57:55<21:09:25, 7.41s/it] {'loss': 0.3934, 'grad_norm': 1.0080931328601075, 'learning_rate': 9.584997143957182e-06, 'epoch': 0.16} + 16%|█▌ | 1911/12188 [3:57:55<21:09:25, 7.41s/it] 16%|█▌ | 1912/12188 [3:58:02<21:13:02, 7.43s/it] {'loss': 0.395, 'grad_norm': 0.7455851684078676, 'learning_rate': 9.584466976475734e-06, 'epoch': 0.16} + 16%|█▌ | 1912/12188 [3:58:02<21:13:02, 7.43s/it] 16%|█▌ | 1913/12188 [3:58:10<21:10:20, 7.42s/it] {'loss': 0.4208, 'grad_norm': 0.6770224851453617, 'learning_rate': 9.583936485246593e-06, 'epoch': 0.16} + 16%|█▌ | 1913/12188 [3:58:10<21:10:20, 7.42s/it] 16%|█▌ | 1914/12188 [3:58:17<20:36:23, 7.22s/it] {'loss': 0.3612, 'grad_norm': 0.6414189899015822, 'learning_rate': 9.583405670307223e-06, 'epoch': 0.16} + 16%|█▌ | 1914/12188 [3:58:17<20:36:23, 7.22s/it] 16%|█▌ | 1915/12188 [3:58:24<20:30:05, 7.18s/it] {'loss': 0.382, 'grad_norm': 0.7442098524546827, 'learning_rate': 9.582874531695109e-06, 'epoch': 0.16} + 16%|█▌ | 1915/12188 [3:58:24<20:30:05, 7.18s/it] 16%|█▌ | 1916/12188 [3:58:31<20:28:18, 7.17s/it] {'loss': 0.4188, 'grad_norm': 0.9044930523605261, 'learning_rate': 9.582343069447758e-06, 'epoch': 0.16} + 16%|█▌ | 1916/12188 [3:58:31<20:28:18, 7.17s/it] 16%|█▌ | 1917/12188 [3:58:38<20:26:45, 7.17s/it] {'loss': 0.4305, 'grad_norm': 0.9074390520828923, 'learning_rate': 9.5818112836027e-06, 'epoch': 0.16} + 16%|█▌ | 1917/12188 [3:58:38<20:26:45, 7.17s/it] 16%|█▌ | 1918/12188 [3:58:46<21:00:09, 7.36s/it] {'loss': 0.4028, 'grad_norm': 0.7526769108197663, 'learning_rate': 9.581279174197491e-06, 'epoch': 0.16} + 16%|█▌ | 1918/12188 [3:58:46<21:00:09, 7.36s/it] 16%|█▌ | 1919/12188 [3:58:53<21:04:44, 7.39s/it] {'loss': 0.3946, 'grad_norm': 0.7773560750416476, 'learning_rate': 9.580746741269707e-06, 'epoch': 0.16} + 16%|█▌ | 1919/12188 [3:58:53<21:04:44, 7.39s/it] 16%|█▌ | 1920/12188 [3:59:00<20:31:23, 7.20s/it] {'loss': 0.3675, 'grad_norm': 0.6934782680379967, 'learning_rate': 9.580213984856947e-06, 'epoch': 0.16} + 16%|█▌ | 1920/12188 [3:59:00<20:31:23, 7.20s/it] 16%|█▌ | 1921/12188 [3:59:08<21:15:59, 7.46s/it] {'loss': 0.3711, 'grad_norm': 0.7537539249042504, 'learning_rate': 9.579680904996836e-06, 'epoch': 0.16} + 16%|█▌ | 1921/12188 [3:59:08<21:15:59, 7.46s/it] 16%|█▌ | 1922/12188 [3:59:15<20:53:54, 7.33s/it] {'loss': 0.3786, 'grad_norm': 0.6956439661188352, 'learning_rate': 9.579147501727018e-06, 'epoch': 0.16} + 16%|█▌ | 1922/12188 [3:59:15<20:53:54, 7.33s/it] 16%|█▌ | 1923/12188 [3:59:22<20:31:13, 7.20s/it] {'loss': 0.3732, 'grad_norm': 0.7376462050501253, 'learning_rate': 9.578613775085158e-06, 'epoch': 0.16} + 16%|█▌ | 1923/12188 [3:59:22<20:31:13, 7.20s/it] 16%|█▌ | 1924/12188 [3:59:29<20:24:47, 7.16s/it] {'loss': 0.3887, 'grad_norm': 0.649545634175982, 'learning_rate': 9.578079725108951e-06, 'epoch': 0.16} + 16%|█▌ | 1924/12188 [3:59:29<20:24:47, 7.16s/it] 16%|█▌ | 1925/12188 [3:59:36<20:21:09, 7.14s/it] {'loss': 0.3704, 'grad_norm': 0.8313741230397547, 'learning_rate': 9.577545351836108e-06, 'epoch': 0.16} + 16%|█▌ | 1925/12188 [3:59:36<20:21:09, 7.14s/it] 16%|█▌ | 1926/12188 [3:59:43<20:26:48, 7.17s/it] {'loss': 0.3832, 'grad_norm': 1.2730265102510308, 'learning_rate': 9.577010655304367e-06, 'epoch': 0.16} + 16%|█▌ | 1926/12188 [3:59:43<20:26:48, 7.17s/it] 16%|█▌ | 1927/12188 [3:59:50<19:59:15, 7.01s/it] {'loss': 0.4487, 'grad_norm': 0.7024896967487745, 'learning_rate': 9.576475635551486e-06, 'epoch': 0.16} + 16%|█▌ | 1927/12188 [3:59:50<19:59:15, 7.01s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f37eaa4d3a0> +[Try #0] Failed to fetch sample 4799078 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f37eaa4d3a0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'User contributions'"}, {'from': 'gpt', 'value': '\nclick(x=0.858, y=0.374)\n'}]} + 16%|█▌ | 1928/12188 [3:59:57<19:56:12, 7.00s/it] {'loss': 0.3531, 'grad_norm': 0.7347799339838125, 'learning_rate': 9.575940292615248e-06, 'epoch': 0.16} + 16%|█▌ | 1928/12188 [3:59:57<19:56:12, 7.00s/it] 16%|█▌ | 1929/12188 [4:00:04<20:14:24, 7.10s/it] {'loss': 0.3638, 'grad_norm': 0.7592057417886311, 'learning_rate': 9.575404626533462e-06, 'epoch': 0.16} + 16%|█▌ | 1929/12188 [4:00:04<20:14:24, 7.10s/it] 16%|█▌ | 1930/12188 [4:00:11<19:48:30, 6.95s/it] {'loss': 0.3853, 'grad_norm': 2.6982678429240807, 'learning_rate': 9.574868637343949e-06, 'epoch': 0.16} + 16%|█▌ | 1930/12188 [4:00:11<19:48:30, 6.95s/it] 16%|█▌ | 1931/12188 [4:00:18<19:46:38, 6.94s/it] {'loss': 0.3601, 'grad_norm': 0.6649804021728528, 'learning_rate': 9.574332325084564e-06, 'epoch': 0.16} + 16%|█▌ | 1931/12188 [4:00:18<19:46:38, 6.94s/it] 16%|█▌ | 1932/12188 [4:00:25<19:41:35, 6.91s/it] {'loss': 0.3744, 'grad_norm': 0.6979823958217983, 'learning_rate': 9.573795689793179e-06, 'epoch': 0.16} + 16%|█▌ | 1932/12188 [4:00:25<19:41:35, 6.91s/it] 16%|█▌ | 1933/12188 [4:00:32<19:57:20, 7.01s/it] {'loss': 0.3906, 'grad_norm': 0.7959594244142736, 'learning_rate': 9.573258731507691e-06, 'epoch': 0.16} + 16%|█▌ | 1933/12188 [4:00:32<19:57:20, 7.01s/it] 16%|█▌ | 1934/12188 [4:00:40<21:06:50, 7.41s/it] {'loss': 0.3743, 'grad_norm': 0.9877746457939535, 'learning_rate': 9.572721450266018e-06, 'epoch': 0.16} + 16%|█▌ | 1934/12188 [4:00:40<21:06:50, 7.41s/it] 16%|█▌ | 1935/12188 [4:00:47<20:30:58, 7.20s/it] {'loss': 0.4141, 'grad_norm': 1.3764316004798711, 'learning_rate': 9.572183846106105e-06, 'epoch': 0.16} + 16%|█▌ | 1935/12188 [4:00:47<20:30:58, 7.20s/it] 16%|█▌ | 1936/12188 [4:00:54<20:44:33, 7.28s/it] {'loss': 0.4327, 'grad_norm': 0.7754140548419465, 'learning_rate': 9.571645919065913e-06, 'epoch': 0.16} + 16%|█▌ | 1936/12188 [4:00:54<20:44:33, 7.28s/it] 16%|█▌ | 1937/12188 [4:01:01<20:11:49, 7.09s/it] {'loss': 0.3809, 'grad_norm': 1.2003124492932895, 'learning_rate': 9.57110766918343e-06, 'epoch': 0.16} + 16%|█▌ | 1937/12188 [4:01:01<20:11:49, 7.09s/it] 16%|█��� | 1938/12188 [4:01:08<20:21:20, 7.15s/it] {'loss': 0.3597, 'grad_norm': 1.4499890631415089, 'learning_rate': 9.57056909649667e-06, 'epoch': 0.16} + 16%|█▌ | 1938/12188 [4:01:08<20:21:20, 7.15s/it] 16%|█▌ | 1939/12188 [4:01:15<20:13:08, 7.10s/it] {'loss': 0.4325, 'grad_norm': 0.7119809543584397, 'learning_rate': 9.570030201043662e-06, 'epoch': 0.16} + 16%|█▌ | 1939/12188 [4:01:15<20:13:08, 7.10s/it] 16%|█▌ | 1940/12188 [4:01:22<20:13:54, 7.11s/it] {'loss': 0.4172, 'grad_norm': 0.715413981662324, 'learning_rate': 9.569490982862464e-06, 'epoch': 0.16} + 16%|█▌ | 1940/12188 [4:01:22<20:13:54, 7.11s/it] 16%|█▌ | 1941/12188 [4:01:29<19:44:49, 6.94s/it] {'loss': 0.3994, 'grad_norm': 0.683315246471334, 'learning_rate': 9.568951441991154e-06, 'epoch': 0.16} + 16%|█▌ | 1941/12188 [4:01:29<19:44:49, 6.94s/it] 16%|█▌ | 1942/12188 [4:01:36<19:44:56, 6.94s/it] {'loss': 0.3864, 'grad_norm': 1.0402857033511357, 'learning_rate': 9.568411578467834e-06, 'epoch': 0.16} + 16%|█▌ | 1942/12188 [4:01:36<19:44:56, 6.94s/it] 16%|█▌ | 1943/12188 [4:01:43<20:09:26, 7.08s/it] {'loss': 0.4122, 'grad_norm': 0.9334044394999492, 'learning_rate': 9.567871392330627e-06, 'epoch': 0.16} + 16%|█▌ | 1943/12188 [4:01:43<20:09:26, 7.08s/it] 16%|█▌ | 1944/12188 [4:01:51<20:33:45, 7.23s/it] {'loss': 0.4237, 'grad_norm': 0.9896245713013732, 'learning_rate': 9.567330883617682e-06, 'epoch': 0.16} + 16%|█▌ | 1944/12188 [4:01:51<20:33:45, 7.23s/it] 16%|█▌ | 1945/12188 [4:01:58<20:09:26, 7.08s/it] {'loss': 0.414, 'grad_norm': 0.8477232654325453, 'learning_rate': 9.566790052367167e-06, 'epoch': 0.16} + 16%|█▌ | 1945/12188 [4:01:58<20:09:26, 7.08s/it] 16%|█▌ | 1946/12188 [4:02:04<19:42:03, 6.92s/it] {'loss': 0.4091, 'grad_norm': 1.745163961525163, 'learning_rate': 9.566248898617277e-06, 'epoch': 0.16} + 16%|█▌ | 1946/12188 [4:02:04<19:42:03, 6.92s/it] 16%|█▌ | 1947/12188 [4:02:11<19:32:56, 6.87s/it] {'loss': 0.4262, 'grad_norm': 0.7186767083130713, 'learning_rate': 9.565707422406227e-06, 'epoch': 0.16} + 16%|█▌ | 1947/12188 [4:02:11<19:32:56, 6.87s/it] 16%|█▌ | 1948/12188 [4:02:19<20:19:20, 7.14s/it] {'loss': 0.399, 'grad_norm': 0.9946328572321541, 'learning_rate': 9.565165623772252e-06, 'epoch': 0.16} + 16%|█▌ | 1948/12188 [4:02:19<20:19:20, 7.14s/it] 16%|█▌ | 1949/12188 [4:02:25<19:53:16, 6.99s/it] {'loss': 0.4196, 'grad_norm': 0.9051702325789885, 'learning_rate': 9.564623502753617e-06, 'epoch': 0.16} + 16%|█▌ | 1949/12188 [4:02:25<19:53:16, 6.99s/it] 16%|█▌ | 1950/12188 [4:02:33<20:01:33, 7.04s/it] {'loss': 0.4241, 'grad_norm': 0.7329518808790392, 'learning_rate': 9.5640810593886e-06, 'epoch': 0.16} + 16%|█▌ | 1950/12188 [4:02:33<20:01:33, 7.04s/it] 16%|█▌ | 1951/12188 [4:02:41<21:34:23, 7.59s/it] {'loss': 0.3533, 'grad_norm': 0.6059202459362284, 'learning_rate': 9.563538293715515e-06, 'epoch': 0.16} + 16%|█▌ | 1951/12188 [4:02:41<21:34:23, 7.59s/it] 16%|█▌ | 1952/12188 [4:02:49<21:29:45, 7.56s/it] {'loss': 0.3665, 'grad_norm': 0.6470734947006833, 'learning_rate': 9.562995205772686e-06, 'epoch': 0.16} + 16%|█▌ | 1952/12188 [4:02:49<21:29:45, 7.56s/it] 16%|█▌ | 1953/12188 [4:02:56<21:27:28, 7.55s/it] {'loss': 0.4003, 'grad_norm': 0.6333585598290781, 'learning_rate': 9.562451795598469e-06, 'epoch': 0.16} + 16%|█▌ | 1953/12188 [4:02:56<21:27:28, 7.55s/it] 16%|█▌ | 1954/12188 [4:03:04<21:08:49, 7.44s/it] {'loss': 0.4443, 'grad_norm': 0.6459968971572155, 'learning_rate': 9.561908063231234e-06, 'epoch': 0.16} + 16%|█▌ | 1954/12188 [4:03:04<21:08:49, 7.44s/it] 16%|█▌ | 1955/12188 [4:03:12<21:54:50, 7.71s/it] {'loss': 0.4447, 'grad_norm': 0.6384250451743513, 'learning_rate': 9.561364008709382e-06, 'epoch': 0.16} + 16%|█▌ | 1955/12188 [4:03:12<21:54:50, 7.71s/it] 16%|█▌ | 1956/12188 [4:03:20<22:07:15, 7.78s/it] {'loss': 0.4164, 'grad_norm': 1.4414832387891947, 'learning_rate': 9.560819632071332e-06, 'epoch': 0.16} + 16%|█▌ | 1956/12188 [4:03:20<22:07:15, 7.78s/it] 16%|█▌ | 1957/12188 [4:03:27<21:22:14, 7.52s/it] {'loss': 0.4052, 'grad_norm': 0.8374636712224814, 'learning_rate': 9.560274933355526e-06, 'epoch': 0.16} + 16%|█▌ | 1957/12188 [4:03:27<21:22:14, 7.52s/it] 16%|█▌ | 1958/12188 [4:03:34<21:12:53, 7.47s/it] {'loss': 0.4082, 'grad_norm': 0.7260656505741745, 'learning_rate': 9.559729912600431e-06, 'epoch': 0.16} + 16%|█▌ | 1958/12188 [4:03:34<21:12:53, 7.47s/it] 16%|█▌ | 1959/12188 [4:03:41<20:44:01, 7.30s/it] {'loss': 0.3835, 'grad_norm': 0.6847775676553601, 'learning_rate': 9.559184569844537e-06, 'epoch': 0.16} + 16%|█▌ | 1959/12188 [4:03:41<20:44:01, 7.30s/it] 16%|█▌ | 1960/12188 [4:03:49<20:52:58, 7.35s/it] {'loss': 0.3743, 'grad_norm': 0.9222455941052295, 'learning_rate': 9.558638905126352e-06, 'epoch': 0.16} + 16%|█▌ | 1960/12188 [4:03:49<20:52:58, 7.35s/it] 16%|█▌ | 1961/12188 [4:03:56<20:45:12, 7.31s/it] {'loss': 0.4028, 'grad_norm': 1.4606496656768977, 'learning_rate': 9.558092918484412e-06, 'epoch': 0.16} + 16%|█▌ | 1961/12188 [4:03:56<20:45:12, 7.31s/it] 16%|█▌ | 1962/12188 [4:04:02<20:11:31, 7.11s/it] {'loss': 0.3955, 'grad_norm': 0.8013660666629057, 'learning_rate': 9.557546609957274e-06, 'epoch': 0.16} + 16%|█▌ | 1962/12188 [4:04:02<20:11:31, 7.11s/it] 16%|█▌ | 1963/12188 [4:04:10<20:13:43, 7.12s/it] {'loss': 0.4281, 'grad_norm': 0.7190178220713034, 'learning_rate': 9.556999979583514e-06, 'epoch': 0.16} + 16%|█▌ | 1963/12188 [4:04:10<20:13:43, 7.12s/it] 16%|█▌ | 1964/12188 [4:04:16<19:58:50, 7.04s/it] {'loss': 0.4166, 'grad_norm': 0.7247228636456992, 'learning_rate': 9.55645302740174e-06, 'epoch': 0.16} + 16%|█▌ | 1964/12188 [4:04:16<19:58:50, 7.04s/it] 16%|█▌ | 1965/12188 [4:04:24<20:10:17, 7.10s/it] {'loss': 0.357, 'grad_norm': 0.7074941284200869, 'learning_rate': 9.555905753450572e-06, 'epoch': 0.16} + 16%|█▌ | 1965/12188 [4:04:24<20:10:17, 7.10s/it] 16%|█▌ | 1966/12188 [4:04:30<19:53:49, 7.01s/it] {'loss': 0.3985, 'grad_norm': 0.9809315066710028, 'learning_rate': 9.55535815776866e-06, 'epoch': 0.16} + 16%|█▌ | 1966/12188 [4:04:30<19:53:49, 7.01s/it] 16%|█▌ | 1967/12188 [4:04:38<20:07:52, 7.09s/it] {'loss': 0.3668, 'grad_norm': 0.9563937998682773, 'learning_rate': 9.554810240394674e-06, 'epoch': 0.16} + 16%|█▌ | 1967/12188 [4:04:38<20:07:52, 7.09s/it] 16%|█▌ | 1968/12188 [4:04:45<20:01:54, 7.06s/it] {'loss': 0.354, 'grad_norm': 2.8834831451153793, 'learning_rate': 9.554262001367305e-06, 'epoch': 0.16} + 16%|█▌ | 1968/12188 [4:04:45<20:01:54, 7.06s/it] 16%|█▌ | 1969/12188 [4:04:52<20:10:33, 7.11s/it] {'loss': 0.4132, 'grad_norm': 0.7654506604058557, 'learning_rate': 9.553713440725271e-06, 'epoch': 0.16} + 16%|█▌ | 1969/12188 [4:04:52<20:10:33, 7.11s/it] 16%|█▌ | 1970/12188 [4:05:01<21:40:05, 7.63s/it] {'loss': 0.3702, 'grad_norm': 0.7483664777073545, 'learning_rate': 9.55316455850731e-06, 'epoch': 0.16} + 16%|█▌ | 1970/12188 [4:05:01<21:40:05, 7.63s/it] 16%|█▌ | 1971/12188 [4:05:09<22:16:00, 7.85s/it] {'loss': 0.4018, 'grad_norm': 0.7046036929955434, 'learning_rate': 9.552615354752184e-06, 'epoch': 0.16} + 16%|█▌ | 1971/12188 [4:05:09<22:16:00, 7.85s/it] 16%|█▌ | 1972/12188 [4:05:17<22:38:39, 7.98s/it] {'loss': 0.4123, 'grad_norm': 0.8118892450052483, 'learning_rate': 9.552065829498676e-06, 'epoch': 0.16} + 16%|█▌ | 1972/12188 [4:05:17<22:38:39, 7.98s/it] 16%|█▌ | 1973/12188 [4:05:24<21:34:35, 7.60s/it] {'loss': 0.3889, 'grad_norm': 0.65021370054662, 'learning_rate': 9.55151598278559e-06, 'epoch': 0.16} + 16%|█▌ | 1973/12188 [4:05:24<21:34:35, 7.60s/it] 16%|█▌ | 1974/12188 [4:05:31<21:10:00, 7.46s/it] {'loss': 0.3633, 'grad_norm': 0.8943213531808847, 'learning_rate': 9.550965814651762e-06, 'epoch': 0.16} + 16%|█▌ | 1974/12188 [4:05:31<21:10:00, 7.46s/it] 16%|█▌ | 1975/12188 [4:05:38<20:30:54, 7.23s/it] {'loss': 0.4457, 'grad_norm': 0.7446667355558567, 'learning_rate': 9.550415325136038e-06, 'epoch': 0.16} + 16%|█▌ | 1975/12188 [4:05:38<20:30:54, 7.23s/it] 16%|█▌ | 1976/12188 [4:05:45<20:29:47, 7.23s/it] {'loss': 0.3568, 'grad_norm': 0.6142091809001924, 'learning_rate': 9.549864514277294e-06, 'epoch': 0.16} + 16%|█▌ | 1976/12188 [4:05:45<20:29:47, 7.23s/it] 16%|█▌ | 1977/12188 [4:05:52<20:28:23, 7.22s/it] {'loss': 0.4088, 'grad_norm': 0.6659297831608174, 'learning_rate': 9.549313382114427e-06, 'epoch': 0.16} + 16%|█▌ | 1977/12188 [4:05:52<20:28:23, 7.22s/it] 16%|█▌ | 1978/12188 [4:06:00<20:35:10, 7.26s/it] {'loss': 0.3599, 'grad_norm': 0.6707498096605559, 'learning_rate': 9.54876192868636e-06, 'epoch': 0.16} + 16%|█▌ | 1978/12188 [4:06:00<20:35:10, 7.26s/it] 16%|█▌ | 1979/12188 [4:06:07<20:30:01, 7.23s/it] {'loss': 0.4261, 'grad_norm': 0.711571949258598, 'learning_rate': 9.548210154032032e-06, 'epoch': 0.16} + 16%|█▌ | 1979/12188 [4:06:07<20:30:01, 7.23s/it] 16%|█▌ | 1980/12188 [4:06:14<20:45:07, 7.32s/it] {'loss': 0.3817, 'grad_norm': 0.6908732747200182, 'learning_rate': 9.547658058190412e-06, 'epoch': 0.16} + 16%|█▌ | 1980/12188 [4:06:14<20:45:07, 7.32s/it] 16%|█▋ | 1981/12188 [4:06:22<20:36:27, 7.27s/it] {'loss': 0.3689, 'grad_norm': 0.6305463124520796, 'learning_rate': 9.547105641200484e-06, 'epoch': 0.16} + 16%|█▋ | 1981/12188 [4:06:22<20:36:27, 7.27s/it] 16%|█▋ | 1982/12188 [4:06:29<20:21:56, 7.18s/it] {'loss': 0.3505, 'grad_norm': 0.6179914685371531, 'learning_rate': 9.546552903101263e-06, 'epoch': 0.16} + 16%|█▋ | 1982/12188 [4:06:29<20:21:56, 7.18s/it] 16%|█▋ | 1983/12188 [4:06:35<19:54:56, 7.03s/it] {'loss': 0.3852, 'grad_norm': 0.7049669203361869, 'learning_rate': 9.545999843931782e-06, 'epoch': 0.16} + 16%|█▋ | 1983/12188 [4:06:35<19:54:56, 7.03s/it] 16%|█▋ | 1984/12188 [4:06:43<20:17:42, 7.16s/it] {'loss': 0.3744, 'grad_norm': 0.6228365107223868, 'learning_rate': 9.545446463731092e-06, 'epoch': 0.16} + 16%|█▋ | 1984/12188 [4:06:43<20:17:42, 7.16s/it] 16%|█▋ | 1985/12188 [4:06:50<20:16:16, 7.15s/it] {'loss': 0.4101, 'grad_norm': 0.6367485527420049, 'learning_rate': 9.544892762538279e-06, 'epoch': 0.16} + 16%|█▋ | 1985/12188 [4:06:50<20:16:16, 7.15s/it] 16%|█▋ | 1986/12188 [4:06:57<20:24:15, 7.20s/it] {'loss': 0.438, 'grad_norm': 0.7522629680204149, 'learning_rate': 9.544338740392438e-06, 'epoch': 0.16} + 16%|█▋ | 1986/12188 [4:06:57<20:24:15, 7.20s/it] 16%|█▋ | 1987/12188 [4:07:04<20:00:30, 7.06s/it] {'loss': 0.3621, 'grad_norm': 0.6541765516724902, 'learning_rate': 9.543784397332698e-06, 'epoch': 0.16} + 16%|█▋ | 1987/12188 [4:07:04<20:00:30, 7.06s/it] 16%|█▋ | 1988/12188 [4:07:11<19:59:18, 7.05s/it] {'loss': 0.3893, 'grad_norm': 0.6450138292144433, 'learning_rate': 9.543229733398203e-06, 'epoch': 0.16} + 16%|█▋ | 1988/12188 [4:07:11<19:59:18, 7.05s/it] 16%|█▋ | 1989/12188 [4:07:17<19:26:34, 6.86s/it] {'loss': 0.4026, 'grad_norm': 0.6404737565847795, 'learning_rate': 9.542674748628125e-06, 'epoch': 0.16} + 16%|█▋ | 1989/12188 [4:07:17<19:26:34, 6.86s/it] 16%|█▋ | 1990/12188 [4:07:24<19:19:10, 6.82s/it] {'loss': 0.3734, 'grad_norm': 0.8092023355456673, 'learning_rate': 9.542119443061652e-06, 'epoch': 0.16} + 16%|█▋ | 1990/12188 [4:07:24<19:19:10, 6.82s/it] 16%|█▋ | 1991/12188 [4:07:31<19:30:46, 6.89s/it] {'loss': 0.4017, 'grad_norm': 0.690983312456753, 'learning_rate': 9.541563816738005e-06, 'epoch': 0.16} + 16%|█▋ | 1991/12188 [4:07:31<19:30:46, 6.89s/it] 16%|█▋ | 1992/12188 [4:07:38<19:25:51, 6.86s/it] {'loss': 0.4275, 'grad_norm': 0.6182705725217754, 'learning_rate': 9.541007869696415e-06, 'epoch': 0.16} + 16%|█▋ | 1992/12188 [4:07:38<19:25:51, 6.86s/it] 16%|█▋ | 1993/12188 [4:07:45<19:59:37, 7.06s/it] {'loss': 0.3895, 'grad_norm': 0.6857874932346671, 'learning_rate': 9.540451601976147e-06, 'epoch': 0.16} + 16%|█▋ | 1993/12188 [4:07:45<19:59:37, 7.06s/it] 16%|█▋ | 1994/12188 [4:07:54<21:31:32, 7.60s/it] {'loss': 0.3872, 'grad_norm': 0.7411404060654745, 'learning_rate': 9.53989501361648e-06, 'epoch': 0.16} + 16%|█▋ | 1994/12188 [4:07:54<21:31:32, 7.60s/it] 16%|█▋ | 1995/12188 [4:08:01<20:46:10, 7.34s/it] {'loss': 0.3674, 'grad_norm': 0.6319110773086042, 'learning_rate': 9.539338104656722e-06, 'epoch': 0.16} + 16%|█▋ | 1995/12188 [4:08:01<20:46:10, 7.34s/it] 16%|█▋ | 1996/12188 [4:08:08<20:29:46, 7.24s/it] {'loss': 0.4, 'grad_norm': 0.7146678155642402, 'learning_rate': 9.538780875136197e-06, 'epoch': 0.16} + 16%|█▋ | 1996/12188 [4:08:08<20:29:46, 7.24s/it] 16%|█▋ | 1997/12188 [4:08:16<21:04:07, 7.44s/it] {'loss': 0.4144, 'grad_norm': 0.6481602861396486, 'learning_rate': 9.538223325094263e-06, 'epoch': 0.16} + 16%|█▋ | 1997/12188 [4:08:16<21:04:07, 7.44s/it] 16%|█▋ | 1998/12188 [4:08:23<20:30:56, 7.25s/it] {'loss': 0.3759, 'grad_norm': 0.7141142156759234, 'learning_rate': 9.537665454570287e-06, 'epoch': 0.16} + 16%|█▋ | 1998/12188 [4:08:23<20:30:56, 7.25s/it] 16%|█▋ | 1999/12188 [4:08:29<20:03:07, 7.08s/it] {'loss': 0.3976, 'grad_norm': 0.6643782637755888, 'learning_rate': 9.537107263603666e-06, 'epoch': 0.16} + 16%|█▋ | 1999/12188 [4:08:29<20:03:07, 7.08s/it] 16%|█▋ | 2000/12188 [4:08:37<20:09:04, 7.12s/it] {'loss': 0.3754, 'grad_norm': 0.6615330866869896, 'learning_rate': 9.536548752233822e-06, 'epoch': 0.16} + 16%|█▋ | 2000/12188 [4:08:37<20:09:04, 7.12s/it]/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/utils/checkpoint.py:87: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( + 16%|█▋ | 2001/12188 [4:09:01<35:01:11, 12.38s/it] {'loss': 0.3779, 'grad_norm': 0.6454987640312385, 'learning_rate': 9.535989920500192e-06, 'epoch': 0.16} + 16%|█▋ | 2001/12188 [4:09:01<35:01:11, 12.38s/it] 16%|█▋ | 2002/12188 [4:09:08<30:31:56, 10.79s/it] {'loss': 0.368, 'grad_norm': 0.666721224712006, 'learning_rate': 9.535430768442243e-06, 'epoch': 0.16} + 16%|█▋ | 2002/12188 [4:09:08<30:31:56, 10.79s/it] 16%|█▋ | 2003/12188 [4:09:16<27:42:46, 9.80s/it] {'loss': 0.3944, 'grad_norm': 0.6389178572387327, 'learning_rate': 9.534871296099457e-06, 'epoch': 0.16} + 16%|█▋ | 2003/12188 [4:09:16<27:42:46, 9.80s/it] 16%|█▋ | 2004/12188 [4:09:23<25:20:51, 8.96s/it] {'loss': 0.4254, 'grad_norm': 0.7317108104882305, 'learning_rate': 9.534311503511349e-06, 'epoch': 0.16} + 16%|█▋ | 2004/12188 [4:09:23<25:20:51, 8.96s/it] 16%|█▋ | 2005/12188 [4:09:30<23:53:57, 8.45s/it] {'loss': 0.413, 'grad_norm': 0.6762410647157744, 'learning_rate': 9.533751390717445e-06, 'epoch': 0.16} + 16%|█▋ | 2005/12188 [4:09:30<23:53:57, 8.45s/it] 16%|█▋ | 2006/12188 [4:09:37<22:33:35, 7.98s/it] {'loss': 0.34, 'grad_norm': 0.6444889923973044, 'learning_rate': 9.533190957757304e-06, 'epoch': 0.16} + 16%|█▋ | 2006/12188 [4:09:37<22:33:35, 7.98s/it] 16%|█▋ | 2007/12188 [4:09:44<21:30:24, 7.60s/it] {'loss': 0.3852, 'grad_norm': 0.6644876794096636, 'learning_rate': 9.532630204670498e-06, 'epoch': 0.16} + 16%|█▋ | 2007/12188 [4:09:44<21:30:24, 7.60s/it] 16%|█▋ | 2008/12188 [4:09:51<21:03:05, 7.44s/it] {'loss': 0.3824, 'grad_norm': 0.6724701353146899, 'learning_rate': 9.53206913149663e-06, 'epoch': 0.16} + 16%|█▋ | 2008/12188 [4:09:51<21:03:05, 7.44s/it] 16%|█▋ | 2009/12188 [4:09:58<20:35:42, 7.28s/it] {'loss': 0.3677, 'grad_norm': 0.8050560220734574, 'learning_rate': 9.531507738275322e-06, 'epoch': 0.16} + 16%|█▋ | 2009/12188 [4:09:58<20:35:42, 7.28s/it] 16%|█▋ | 2010/12188 [4:10:05<20:30:01, 7.25s/it] {'loss': 0.403, 'grad_norm': 0.697766078198689, 'learning_rate': 9.53094602504622e-06, 'epoch': 0.16} + 16%|█▋ | 2010/12188 [4:10:05<20:30:01, 7.25s/it] 16%|█▋ | 2011/12188 [4:10:12<19:59:25, 7.07s/it] {'loss': 0.4098, 'grad_norm': 0.6465449553753231, 'learning_rate': 9.530383991848986e-06, 'epoch': 0.16} + 16%|█▋ | 2011/12188 [4:10:12<19:59:25, 7.07s/it] 17%|█▋ | 2012/12188 [4:10:19<20:12:52, 7.15s/it] {'loss': 0.3874, 'grad_norm': 0.6301280259493615, 'learning_rate': 9.529821638723316e-06, 'epoch': 0.17} + 17%|█▋ | 2012/12188 [4:10:19<20:12:52, 7.15s/it] 17%|█▋ | 2013/12188 [4:10:26<20:11:05, 7.14s/it] {'loss': 0.3944, 'grad_norm': 0.6671963271781229, 'learning_rate': 9.529258965708916e-06, 'epoch': 0.17} + 17%|█▋ | 2013/12188 [4:10:26<20:11:05, 7.14s/it] 17%|█▋ | 2014/12188 [4:10:34<20:36:23, 7.29s/it] {'loss': 0.3976, 'grad_norm': 0.6370267983084557, 'learning_rate': 9.528695972845528e-06, 'epoch': 0.17} + 17%|█▋ | 2014/12188 [4:10:34<20:36:23, 7.29s/it] 17%|█▋ | 2015/12188 [4:10:41<20:35:50, 7.29s/it] {'loss': 0.3799, 'grad_norm': 0.6772344206516141, 'learning_rate': 9.528132660172904e-06, 'epoch': 0.17} + 17%|█▋ | 2015/12188 [4:10:41<20:35:50, 7.29s/it] 17%|█▋ | 2016/12188 [4:10:48<20:15:02, 7.17s/it] {'loss': 0.3982, 'grad_norm': 0.6947358861704559, 'learning_rate': 9.527569027730827e-06, 'epoch': 0.17} + 17%|█▋ | 2016/12188 [4:10:48<20:15:02, 7.17s/it] 17%|█▋ | 2017/12188 [4:10:56<21:20:48, 7.56s/it] {'loss': 0.3853, 'grad_norm': 0.6575675545886158, 'learning_rate': 9.5270050755591e-06, 'epoch': 0.17} + 17%|█▋ | 2017/12188 [4:10:56<21:20:48, 7.56s/it] 17%|█▋ | 2018/12188 [4:11:03<20:59:04, 7.43s/it] {'loss': 0.4106, 'grad_norm': 0.645696202976445, 'learning_rate': 9.526440803697548e-06, 'epoch': 0.17} + 17%|█▋ | 2018/12188 [4:11:03<20:59:04, 7.43s/it] 17%|█▋ | 2019/12188 [4:11:10<20:42:52, 7.33s/it] {'loss': 0.3854, 'grad_norm': 1.0451687351123693, 'learning_rate': 9.525876212186018e-06, 'epoch': 0.17} + 17%|█▋ | 2019/12188 [4:11:10<20:42:52, 7.33s/it] 17%|█▋ | 2020/12188 [4:11:17<20:17:56, 7.19s/it] {'loss': 0.4157, 'grad_norm': 0.6746753921879453, 'learning_rate': 9.525311301064382e-06, 'epoch': 0.17} + 17%|█▋ | 2020/12188 [4:11:17<20:17:56, 7.19s/it] 17%|█▋ | 2021/12188 [4:11:26<21:11:06, 7.50s/it] {'loss': 0.396, 'grad_norm': 0.650482297441164, 'learning_rate': 9.524746070372532e-06, 'epoch': 0.17} + 17%|█▋ | 2021/12188 [4:11:26<21:11:06, 7.50s/it] 17%|█▋ | 2022/12188 [4:11:32<20:28:30, 7.25s/it] {'loss': 0.3997, 'grad_norm': 0.6073364183742793, 'learning_rate': 9.524180520150383e-06, 'epoch': 0.17} + 17%|█▋ | 2022/12188 [4:11:32<20:28:30, 7.25s/it] 17%|█▋ | 2023/12188 [4:11:39<20:17:51, 7.19s/it] {'loss': 0.3648, 'grad_norm': 0.6592839151429709, 'learning_rate': 9.523614650437876e-06, 'epoch': 0.17} + 17%|█▋ | 2023/12188 [4:11:39<20:17:51, 7.19s/it] 17%|█▋ | 2024/12188 [4:11:46<19:52:46, 7.04s/it] {'loss': 0.3719, 'grad_norm': 0.5890399739925504, 'learning_rate': 9.523048461274968e-06, 'epoch': 0.17} + 17%|█▋ | 2024/12188 [4:11:46<19:52:46, 7.04s/it] 17%|█▋ | 2025/12188 [4:11:53<19:45:22, 7.00s/it] {'loss': 0.3832, 'grad_norm': 0.6456595556435875, 'learning_rate': 9.522481952701646e-06, 'epoch': 0.17} + 17%|█▋ | 2025/12188 [4:11:53<19:45:22, 7.00s/it] 17%|█▋ | 2026/12188 [4:12:00<20:13:03, 7.16s/it] {'loss': 0.3805, 'grad_norm': 0.635424696776647, 'learning_rate': 9.521915124757915e-06, 'epoch': 0.17} + 17%|█▋ | 2026/12188 [4:12:00<20:13:03, 7.16s/it] 17%|█▋ | 2027/12188 [4:12:07<19:55:51, 7.06s/it] {'loss': 0.3903, 'grad_norm': 0.7108972816665662, 'learning_rate': 9.521347977483802e-06, 'epoch': 0.17} + 17%|█▋ | 2027/12188 [4:12:07<19:55:51, 7.06s/it] 17%|█▋ | 2028/12188 [4:12:15<20:11:07, 7.15s/it] {'loss': 0.3893, 'grad_norm': 0.6790390324754764, 'learning_rate': 9.520780510919358e-06, 'epoch': 0.17} + 17%|█▋ | 2028/12188 [4:12:15<20:11:07, 7.15s/it] 17%|█▋ | 2029/12188 [4:12:22<20:14:29, 7.17s/it] {'loss': 0.4347, 'grad_norm': 0.6623001563476419, 'learning_rate': 9.52021272510466e-06, 'epoch': 0.17} + 17%|█▋ | 2029/12188 [4:12:22<20:14:29, 7.17s/it] 17%|█▋ | 2030/12188 [4:12:29<20:00:34, 7.09s/it] {'loss': 0.3786, 'grad_norm': 0.6756134243952981, 'learning_rate': 9.5196446200798e-06, 'epoch': 0.17} + 17%|█▋ | 2030/12188 [4:12:29<20:00:34, 7.09s/it] 17%|█▋ | 2031/12188 [4:12:35<19:43:21, 6.99s/it] {'loss': 0.3826, 'grad_norm': 0.6638170965607121, 'learning_rate': 9.519076195884898e-06, 'epoch': 0.17} + 17%|█▋ | 2031/12188 [4:12:35<19:43:21, 6.99s/it] 17%|█▋ | 2032/12188 [4:12:44<20:59:21, 7.44s/it] {'loss': 0.3677, 'grad_norm': 0.6593180172801646, 'learning_rate': 9.518507452560098e-06, 'epoch': 0.17} + 17%|█▋ | 2032/12188 [4:12:44<20:59:21, 7.44s/it] 17%|█▋ | 2033/12188 [4:12:52<21:05:15, 7.48s/it] {'loss': 0.4122, 'grad_norm': 0.6647054837471289, 'learning_rate': 9.517938390145557e-06, 'epoch': 0.17} + 17%|█▋ | 2033/12188 [4:12:52<21:05:15, 7.48s/it] 17%|█▋ | 2034/12188 [4:13:00<21:34:01, 7.65s/it] {'loss': 0.3664, 'grad_norm': 0.5681682591886665, 'learning_rate': 9.51736900868147e-06, 'epoch': 0.17} + 17%|█▋ | 2034/12188 [4:13:00<21:34:01, 7.65s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 17%|█▋ | 2035/12188 [4:13:06<20:51:59, 7.40s/it] {'loss': 0.745, 'grad_norm': 1.4093112600287505, 'learning_rate': 9.516799308208038e-06, 'epoch': 0.17} + 17%|█▋ | 2035/12188 [4:13:06<20:51:59, 7.40s/it] 17%|█▋ | 2036/12188 [4:13:14<20:56:20, 7.43s/it] {'loss': 0.3723, 'grad_norm': 0.6760218521077411, 'learning_rate': 9.516229288765497e-06, 'epoch': 0.17} + 17%|█▋ | 2036/12188 [4:13:14<20:56:20, 7.43s/it] 17%|█▋ | 2037/12188 [4:13:21<20:27:56, 7.26s/it] {'loss': 0.3821, 'grad_norm': 0.6352752496245146, 'learning_rate': 9.515658950394099e-06, 'epoch': 0.17} + 17%|█▋ | 2037/12188 [4:13:21<20:27:56, 7.26s/it] 17%|█▋ | 2038/12188 [4:13:28<20:20:51, 7.22s/it] {'loss': 0.4022, 'grad_norm': 0.690411557528179, 'learning_rate': 9.51508829313412e-06, 'epoch': 0.17} + 17%|█▋ | 2038/12188 [4:13:28<20:20:51, 7.22s/it] 17%|█▋ | 2039/12188 [4:13:36<21:27:43, 7.61s/it] {'loss': 0.3712, 'grad_norm': 0.6465252725924353, 'learning_rate': 9.514517317025862e-06, 'epoch': 0.17} + 17%|█▋ | 2039/12188 [4:13:36<21:27:43, 7.61s/it] 17%|█▋ | 2040/12188 [4:13:44<21:11:07, 7.52s/it] {'loss': 0.3591, 'grad_norm': 0.6041117795907213, 'learning_rate': 9.513946022109643e-06, 'epoch': 0.17} + 17%|█▋ | 2040/12188 [4:13:44<21:11:07, 7.52s/it] 17%|█▋ | 2041/12188 [4:13:51<20:48:20, 7.38s/it] {'loss': 0.3989, 'grad_norm': 0.6371733511640165, 'learning_rate': 9.513374408425809e-06, 'epoch': 0.17} + 17%|█▋ | 2041/12188 [4:13:51<20:48:20, 7.38s/it] 17%|█▋ | 2042/12188 [4:13:59<21:20:00, 7.57s/it] {'loss': 0.3784, 'grad_norm': 0.6645386425129249, 'learning_rate': 9.512802476014724e-06, 'epoch': 0.17} + 17%|█▋ | 2042/12188 [4:13:59<21:20:00, 7.57s/it] 17%|█▋ | 2043/12188 [4:14:07<22:02:14, 7.82s/it] {'loss': 0.3816, 'grad_norm': 0.6806526969839312, 'learning_rate': 9.512230224916779e-06, 'epoch': 0.17} + 17%|█▋ | 2043/12188 [4:14:07<22:02:14, 7.82s/it] 17%|█▋ | 2044/12188 [4:14:14<21:23:53, 7.59s/it] {'loss': 0.3879, 'grad_norm': 0.60840087199747, 'learning_rate': 9.511657655172387e-06, 'epoch': 0.17} + 17%|█▋ | 2044/12188 [4:14:14<21:23:53, 7.59s/it] 17%|█▋ | 2045/12188 [4:14:23<22:15:56, 7.90s/it] {'loss': 0.3946, 'grad_norm': 0.6853069552833762, 'learning_rate': 9.511084766821977e-06, 'epoch': 0.17} + 17%|█▋ | 2045/12188 [4:14:23<22:15:56, 7.90s/it] 17%|█▋ | 2046/12188 [4:14:30<21:45:40, 7.72s/it] {'loss': 0.4033, 'grad_norm': 0.6296207821916263, 'learning_rate': 9.51051155990601e-06, 'epoch': 0.17} + 17%|█▋ | 2046/12188 [4:14:30<21:45:40, 7.72s/it] 17%|█▋ | 2047/12188 [4:14:38<22:08:24, 7.86s/it] {'loss': 0.3913, 'grad_norm': 0.6188812020140116, 'learning_rate': 9.509938034464963e-06, 'epoch': 0.17} + 17%|█▋ | 2047/12188 [4:14:38<22:08:24, 7.86s/it] 17%|█▋ | 2048/12188 [4:14:45<21:28:06, 7.62s/it] {'loss': 0.4168, 'grad_norm': 0.6801244804332838, 'learning_rate': 9.509364190539337e-06, 'epoch': 0.17} + 17%|█▋ | 2048/12188 [4:14:45<21:28:06, 7.62s/it] 17%|█▋ | 2049/12188 [4:14:52<20:51:42, 7.41s/it] {'loss': 0.4011, 'grad_norm': 0.6499705079230528, 'learning_rate': 9.508790028169658e-06, 'epoch': 0.17} + 17%|█▋ | 2049/12188 [4:14:52<20:51:42, 7.41s/it] 17%|█▋ | 2050/12188 [4:14:59<20:25:11, 7.25s/it] {'loss': 0.3923, 'grad_norm': 0.5877648594634254, 'learning_rate': 9.508215547396471e-06, 'epoch': 0.17} + 17%|█▋ | 2050/12188 [4:14:59<20:25:11, 7.25s/it] 17%|█▋ | 2051/12188 [4:15:07<20:48:31, 7.39s/it] {'loss': 0.4277, 'grad_norm': 0.7362608193593966, 'learning_rate': 9.507640748260343e-06, 'epoch': 0.17} + 17%|█▋ | 2051/12188 [4:15:07<20:48:31, 7.39s/it] 17%|█▋ | 2052/12188 [4:15:14<20:53:15, 7.42s/it] {'loss': 0.3793, 'grad_norm': 0.6698426997993757, 'learning_rate': 9.50706563080187e-06, 'epoch': 0.17} + 17%|█▋ | 2052/12188 [4:15:14<20:53:15, 7.42s/it] 17%|█▋ | 2053/12188 [4:15:22<20:38:40, 7.33s/it] {'loss': 0.4107, 'grad_norm': 0.6148110396362916, 'learning_rate': 9.506490195061661e-06, 'epoch': 0.17} + 17%|█▋ | 2053/12188 [4:15:22<20:38:40, 7.33s/it] 17%|█▋ | 2054/12188 [4:15:28<20:18:30, 7.21s/it] {'loss': 0.3866, 'grad_norm': 0.6616398134284277, 'learning_rate': 9.505914441080357e-06, 'epoch': 0.17} + 17%|█▋ | 2054/12188 [4:15:28<20:18:30, 7.21s/it] 17%|█▋ | 2055/12188 [4:15:35<19:55:25, 7.08s/it] {'loss': 0.3608, 'grad_norm': 0.6520781994815675, 'learning_rate': 9.505338368898613e-06, 'epoch': 0.17} + 17%|█▋ | 2055/12188 [4:15:35<19:55:25, 7.08s/it] 17%|█▋ | 2056/12188 [4:15:43<20:41:08, 7.35s/it] {'loss': 0.3479, 'grad_norm': 0.5870593293940386, 'learning_rate': 9.504761978557114e-06, 'epoch': 0.17} + 17%|█▋ | 2056/12188 [4:15:43<20:41:08, 7.35s/it] 17%|█▋ | 2057/12188 [4:15:50<20:10:36, 7.17s/it] {'loss': 0.4008, 'grad_norm': 0.6978838600959042, 'learning_rate': 9.504185270096559e-06, 'epoch': 0.17} + 17%|█▋ | 2057/12188 [4:15:50<20:10:36, 7.17s/it] 17%|█▋ | 2058/12188 [4:15:57<19:54:13, 7.07s/it] {'loss': 0.4062, 'grad_norm': 0.7996099399003268, 'learning_rate': 9.503608243557677e-06, 'epoch': 0.17} + 17%|█▋ | 2058/12188 [4:15:57<19:54:13, 7.07s/it] 17%|█▋ | 2059/12188 [4:16:04<20:08:56, 7.16s/it] {'loss': 0.4023, 'grad_norm': 0.655076642423272, 'learning_rate': 9.503030898981218e-06, 'epoch': 0.17} + 17%|█▋ | 2059/12188 [4:16:04<20:08:56, 7.16s/it] 17%|█▋ | 2060/12188 [4:16:14<22:47:07, 8.10s/it] {'loss': 0.456, 'grad_norm': 0.6682407743629372, 'learning_rate': 9.50245323640795e-06, 'epoch': 0.17} + 17%|█▋ | 2060/12188 [4:16:14<22:47:07, 8.10s/it] 17%|█▋ | 2061/12188 [4:16:21<21:38:12, 7.69s/it] {'loss': 0.4156, 'grad_norm': 0.698400430902904, 'learning_rate': 9.501875255878668e-06, 'epoch': 0.17} + 17%|█▋ | 2061/12188 [4:16:21<21:38:12, 7.69s/it] 17%|█▋ | 2062/12188 [4:16:28<21:03:55, 7.49s/it] {'loss': 0.4155, 'grad_norm': 0.6414445837166823, 'learning_rate': 9.501296957434189e-06, 'epoch': 0.17} + 17%|█▋ | 2062/12188 [4:16:28<21:03:55, 7.49s/it] 17%|█▋ | 2063/12188 [4:16:35<20:32:26, 7.30s/it] {'loss': 0.3927, 'grad_norm': 0.7093097218178649, 'learning_rate': 9.500718341115351e-06, 'epoch': 0.17} + 17%|█▋ | 2063/12188 [4:16:35<20:32:26, 7.30s/it] 17%|█▋ | 2064/12188 [4:16:42<20:04:01, 7.14s/it] {'loss': 0.3605, 'grad_norm': 0.6625421664414551, 'learning_rate': 9.500139406963015e-06, 'epoch': 0.17} + 17%|█▋ | 2064/12188 [4:16:42<20:04:01, 7.14s/it] 17%|█▋ | 2065/12188 [4:16:49<19:50:32, 7.06s/it] {'loss': 0.3838, 'grad_norm': 0.6051510953902565, 'learning_rate': 9.499560155018063e-06, 'epoch': 0.17} + 17%|█▋ | 2065/12188 [4:16:49<19:50:32, 7.06s/it] 17%|█▋ | 2066/12188 [4:16:56<20:24:53, 7.26s/it] {'loss': 0.4194, 'grad_norm': 0.6208827235103149, 'learning_rate': 9.498980585321402e-06, 'epoch': 0.17} + 17%|█▋ | 2066/12188 [4:16:56<20:24:53, 7.26s/it] 17%|█▋ | 2067/12188 [4:17:03<20:12:33, 7.19s/it] {'loss': 0.3624, 'grad_norm': 0.6385513863172552, 'learning_rate': 9.498400697913961e-06, 'epoch': 0.17} + 17%|█▋ | 2067/12188 [4:17:03<20:12:33, 7.19s/it] 17%|█▋ | 2068/12188 [4:17:11<20:36:44, 7.33s/it] {'loss': 0.388, 'grad_norm': 0.6617461944382855, 'learning_rate': 9.497820492836688e-06, 'epoch': 0.17} + 17%|█▋ | 2068/12188 [4:17:11<20:36:44, 7.33s/it] 17%|█▋ | 2069/12188 [4:17:18<20:03:47, 7.14s/it] {'loss': 0.3684, 'grad_norm': 0.7383865685022031, 'learning_rate': 9.497239970130561e-06, 'epoch': 0.17} + 17%|█▋ | 2069/12188 [4:17:18<20:03:47, 7.14s/it] 17%|█▋ | 2070/12188 [4:17:25<19:52:17, 7.07s/it] {'loss': 0.3983, 'grad_norm': 0.6631743716555812, 'learning_rate': 9.496659129836572e-06, 'epoch': 0.17} + 17%|█▋ | 2070/12188 [4:17:25<19:52:17, 7.07s/it] 17%|█▋ | 2071/12188 [4:17:57<40:57:01, 14.57s/it] {'loss': 0.4097, 'grad_norm': 0.6863668626219841, 'learning_rate': 9.49607797199574e-06, 'epoch': 0.17} + 17%|█▋ | 2071/12188 [4:17:57<40:57:01, 14.57s/it] 17%|█▋ | 2072/12188 [4:18:04<34:47:32, 12.38s/it] {'loss': 0.391, 'grad_norm': 0.6754321401413568, 'learning_rate': 9.495496496649103e-06, 'epoch': 0.17} + 17%|█▋ | 2072/12188 [4:18:04<34:47:32, 12.38s/it] 17%|█▋ | 2073/12188 [4:18:11<30:22:40, 10.81s/it] {'loss': 0.4039, 'grad_norm': 0.6535940494846231, 'learning_rate': 9.494914703837727e-06, 'epoch': 0.17} + 17%|█▋ | 2073/12188 [4:18:11<30:22:40, 10.81s/it] 17%|█▋ | 2074/12188 [4:18:33<39:54:49, 14.21s/it] {'loss': 0.3832, 'grad_norm': 0.6671388477676423, 'learning_rate': 9.494332593602698e-06, 'epoch': 0.17} + 17%|█▋ | 2074/12188 [4:18:33<39:54:49, 14.21s/it] 17%|█▋ | 2075/12188 [4:18:59<49:23:31, 17.58s/it] {'loss': 0.3663, 'grad_norm': 0.634333440070248, 'learning_rate': 9.493750165985119e-06, 'epoch': 0.17} + 17%|█▋ | 2075/12188 [4:18:59<49:23:31, 17.58s/it] 17%|█▋ | 2076/12188 [4:19:22<54:23:22, 19.36s/it] {'loss': 0.3835, 'grad_norm': 0.6674061082982202, 'learning_rate': 9.493167421026125e-06, 'epoch': 0.17} + 17%|█▋ | 2076/12188 [4:19:22<54:23:22, 19.36s/it]Traceback (most recent call last): + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 280, in load + s = read(self.decodermaxblock) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/PngImagePlugin.py", line 989, in load_read + cid, pos, length = self.png.read() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/PngImagePlugin.py", line 173, in read + length = i32(s) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/_binary.py", line 95, in i32be + return unpack_from(">I", c, o)[0] +struct.error: unpack_from requires a buffer of at least 4 bytes for unpacking 4 bytes at offset 0 (actual buffer size is 0) + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 90, in pil_loader + return img.convert("RGB") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 993, in convert + self.load() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 287, in load + raise OSError(msg) from e +OSError: image file is truncated +[Try #0] Failed to fetch sample 329452 in VC:s3://gui/aguvis/aguvis-stage1/omniact/images. Exception: image file is truncated +Problematic sample: {'image': 'train_1500.png', 'conversations': [{'from': 'human', 'value': '\nDistribute the present map location.'}, {'from': 'gpt', 'value': '\nclick(x=0.9646, y=0.0194)\n'}]} + 17%|█▋ | 2077/12188 [4:19:29<43:50:58, 15.61s/it] {'loss': 0.3846, 'grad_norm': 0.6720198206757878, 'learning_rate': 9.492584358766865e-06, 'epoch': 0.17} + 17%|█▋ | 2077/12188 [4:19:29<43:50:58, 15.61s/it] 17%|█▋ | 2078/12188 [4:20:10<65:22:32, 23.28s/it] {'loss': 0.3724, 'grad_norm': 0.6622905305338308, 'learning_rate': 9.492000979248517e-06, 'epoch': 0.17} + 17%|█▋ | 2078/12188 [4:20:10<65:22:32, 23.28s/it] 17%|█▋ | 2079/12188 [4:20:42<72:33:13, 25.84s/it] {'loss': 0.3771, 'grad_norm': 0.6485427453735093, 'learning_rate': 9.491417282512276e-06, 'epoch': 0.17} + 17%|█▋ | 2079/12188 [4:20:42<72:33:13, 25.84s/it] 17%|█▋ | 2080/12188 [4:20:50<57:05:51, 20.34s/it] {'loss': 0.353, 'grad_norm': 0.6374465809153983, 'learning_rate': 9.490833268599363e-06, 'epoch': 0.17} + 17%|█▋ | 2080/12188 [4:20:50<57:05:51, 20.34s/it] 17%|█▋ | 2081/12188 [4:20:57<46:05:35, 16.42s/it] {'loss': 0.3435, 'grad_norm': 0.653688377009864, 'learning_rate': 9.49024893755102e-06, 'epoch': 0.17} + 17%|█▋ | 2081/12188 [4:20:57<46:05:35, 16.42s/it] 17%|█▋ | 2082/12188 [4:21:04<38:09:24, 13.59s/it] {'loss': 0.4117, 'grad_norm': 0.6814514445925304, 'learning_rate': 9.48966428940851e-06, 'epoch': 0.17} + 17%|█▋ | 2082/12188 [4:21:04<38:09:24, 13.59s/it] 17%|█▋ | 2083/12188 [4:21:26<45:23:21, 16.17s/it] {'loss': 0.3967, 'grad_norm': 0.6250403365020534, 'learning_rate': 9.489079324213122e-06, 'epoch': 0.17} + 17%|█▋ | 2083/12188 [4:21:26<45:23:21, 16.17s/it] 17%|█▋ | 2084/12188 [4:22:31<86:34:32, 30.85s/it] {'loss': 0.3759, 'grad_norm': 0.655347769870268, 'learning_rate': 9.488494042006164e-06, 'epoch': 0.17} + 17%|█▋ | 2084/12188 [4:22:31<86:34:32, 30.85s/it] 17%|█▋ | 2085/12188 [4:22:38<66:38:54, 23.75s/it] {'loss': 0.4143, 'grad_norm': 0.6684577211007403, 'learning_rate': 9.487908442828968e-06, 'epoch': 0.17} + 17%|█▋ | 2085/12188 [4:22:38<66:38:54, 23.75s/it] 17%|█▋ | 2086/12188 [4:23:16<78:11:08, 27.86s/it] {'loss': 0.3549, 'grad_norm': 0.6494042171804018, 'learning_rate': 9.487322526722888e-06, 'epoch': 0.17} + 17%|█▋ | 2086/12188 [4:23:16<78:11:08, 27.86s/it] 17%|█▋ | 2087/12188 [4:23:23<60:36:54, 21.60s/it] {'loss': 0.3758, 'grad_norm': 0.6639681353878347, 'learning_rate': 9.486736293729302e-06, 'epoch': 0.17} + 17%|█▋ | 2087/12188 [4:23:23<60:36:54, 21.60s/it] 17%|█▋ | 2088/12188 [4:23:44<60:32:45, 21.58s/it] {'loss': 0.4048, 'grad_norm': 0.6904734339139861, 'learning_rate': 9.486149743889606e-06, 'epoch': 0.17} + 17%|█▋ | 2088/12188 [4:23:44<60:32:45, 21.58s/it] 17%|█▋ | 2089/12188 [4:23:52<48:26:08, 17.27s/it] {'loss': 0.3737, 'grad_norm': 0.6399937429851352, 'learning_rate': 9.485562877245222e-06, 'epoch': 0.17} + 17%|█▋ | 2089/12188 [4:23:52<48:26:08, 17.27s/it] 17%|█▋ | 2090/12188 [4:23:59<40:08:56, 14.31s/it] {'loss': 0.3873, 'grad_norm': 0.6288069678474457, 'learning_rate': 9.484975693837594e-06, 'epoch': 0.17} + 17%|█▋ | 2090/12188 [4:23:59<40:08:56, 14.31s/it] 17%|█▋ | 2091/12188 [4:24:40<62:26:29, 22.26s/it] {'loss': 0.3889, 'grad_norm': 0.6198744146856983, 'learning_rate': 9.48438819370819e-06, 'epoch': 0.17} + 17%|█▋ | 2091/12188 [4:24:40<62:26:29, 22.26s/it] 17%|█▋ | 2092/12188 [4:25:55<106:49:57, 38.09s/it] {'loss': 0.3749, 'grad_norm': 0.6378273296083161, 'learning_rate': 9.483800376898496e-06, 'epoch': 0.17} + 17%|█▋ | 2092/12188 [4:25:55<106:49:57, 38.09s/it] 17%|█▋ | 2093/12188 [4:26:35<108:23:20, 38.65s/it] {'loss': 0.4155, 'grad_norm': 0.7231097687565606, 'learning_rate': 9.483212243450024e-06, 'epoch': 0.17} + 17%|█▋ | 2093/12188 [4:26:35<108:23:20, 38.65s/it] 17%|█▋ | 2094/12188 [4:27:30<122:00:20, 43.51s/it] {'loss': 0.4249, 'grad_norm': 0.7032520856798052, 'learning_rate': 9.482623793404304e-06, 'epoch': 0.17} + 17%|█▋ | 2094/12188 [4:27:30<122:00:20, 43.51s/it] 17%|█▋ | 2095/12188 [4:27:51<103:35:41, 36.95s/it] {'loss': 0.402, 'grad_norm': 0.6243037195802119, 'learning_rate': 9.482035026802896e-06, 'epoch': 0.17} + 17%|█▋ | 2095/12188 [4:27:51<103:35:41, 36.95s/it] 17%|█▋ | 2096/12188 [4:28:53<124:24:24, 44.38s/it] {'loss': 0.4025, 'grad_norm': 0.6718219616355926, 'learning_rate': 9.481445943687372e-06, 'epoch': 0.17} + 17%|█▋ | 2096/12188 [4:28:53<124:24:24, 44.38s/it] 17%|█▋ | 2097/12188 [4:29:01<93:29:21, 33.35s/it] {'loss': 0.3775, 'grad_norm': 0.6486948240579343, 'learning_rate': 9.48085654409934e-06, 'epoch': 0.17} + 17%|█▋ | 2097/12188 [4:29:01<93:29:21, 33.35s/it] 17%|█▋ | 2098/12188 [4:29:46<103:22:14, 36.88s/it] {'loss': 0.3754, 'grad_norm': 0.8442830609225564, 'learning_rate': 9.480266828080414e-06, 'epoch': 0.17} + 17%|█▋ | 2098/12188 [4:29:46<103:22:14, 36.88s/it] 17%|█▋ | 2099/12188 [4:29:54<79:22:32, 28.32s/it] {'loss': 0.3985, 'grad_norm': 0.6240556700854979, 'learning_rate': 9.479676795672243e-06, 'epoch': 0.17} + 17%|█▋ | 2099/12188 [4:29:54<79:22:32, 28.32s/it] 17%|█▋ | 2100/12188 [4:30:01<61:34:11, 21.97s/it] {'loss': 0.3587, 'grad_norm': 0.6103422874355373, 'learning_rate': 9.479086446916496e-06, 'epoch': 0.17} + 17%|█▋ | 2100/12188 [4:30:01<61:34:11, 21.97s/it] 17%|█▋ | 2101/12188 [4:30:25<63:26:56, 22.64s/it] {'loss': 0.3651, 'grad_norm': 0.6459806798684672, 'learning_rate': 9.47849578185486e-06, 'epoch': 0.17} + 17%|█▋ | 2101/12188 [4:30:25<63:26:56, 22.64s/it] 17%|█▋ | 2102/12188 [4:31:26<95:18:43, 34.02s/it] {'loss': 0.3688, 'grad_norm': 0.6284468720633719, 'learning_rate': 9.477904800529045e-06, 'epoch': 0.17} + 17%|█▋ | 2102/12188 [4:31:26<95:18:43, 34.02s/it] 17%|█▋ | 2103/12188 [4:31:50<86:43:44, 30.96s/it] {'loss': 0.377, 'grad_norm': 0.6297547861872408, 'learning_rate': 9.477313502980789e-06, 'epoch': 0.17} + 17%|█▋ | 2103/12188 [4:31:50<86:43:44, 30.96s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f194bf19ee0> +[Try #0] Failed to fetch sample 4392554 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f194bf19ee0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Special pages'"}, {'from': 'gpt', 'value': '\nclick(x=0.854, y=0.415)\n'}]} + 17%|█▋ | 2104/12188 [4:31:57<66:40:32, 23.80s/it] {'loss': 0.3884, 'grad_norm': 0.6147160731840557, 'learning_rate': 9.476721889251847e-06, 'epoch': 0.17} + 17%|█▋ | 2104/12188 [4:31:57<66:40:32, 23.80s/it] 17%|█▋ | 2105/12188 [4:32:04<52:48:10, 18.85s/it] {'loss': 0.3772, 'grad_norm': 0.6443563456447778, 'learning_rate': 9.476129959383997e-06, 'epoch': 0.17} + 17%|█▋ | 2105/12188 [4:32:04<52:48:10, 18.85s/it] 17%|█▋ | 2106/12188 [4:32:11<42:43:23, 15.26s/it] {'loss': 0.4077, 'grad_norm': 0.6551181382776884, 'learning_rate': 9.47553771341904e-06, 'epoch': 0.17} + 17%|█▋ | 2106/12188 [4:32:11<42:43:23, 15.26s/it] 17%|█▋ | 2107/12188 [4:32:38<52:23:07, 18.71s/it] {'loss': 0.4435, 'grad_norm': 0.6528804253001774, 'learning_rate': 9.474945151398802e-06, 'epoch': 0.17} + 17%|█▋ | 2107/12188 [4:32:38<52:23:07, 18.71s/it] 17%|█▋ | 2108/12188 [4:32:46<43:09:22, 15.41s/it] {'loss': 0.3888, 'grad_norm': 0.653221198244301, 'learning_rate': 9.474352273365126e-06, 'epoch': 0.17} + 17%|█▋ | 2108/12188 [4:32:46<43:09:22, 15.41s/it] 17%|█▋ | 2109/12188 [4:32:52<35:43:05, 12.76s/it] {'loss': 0.3662, 'grad_norm': 0.6765208052350777, 'learning_rate': 9.47375907935988e-06, 'epoch': 0.17} + 17%|█▋ | 2109/12188 [4:32:52<35:43:05, 12.76s/it] 17%|█▋ | 2110/12188 [4:32:59<30:51:52, 11.03s/it] {'loss': 0.4267, 'grad_norm': 0.6940520653946005, 'learning_rate': 9.473165569424957e-06, 'epoch': 0.17} + 17%|█▋ | 2110/12188 [4:32:59<30:51:52, 11.03s/it] 17%|█▋ | 2111/12188 [4:33:06<27:25:19, 9.80s/it] {'loss': 0.3425, 'grad_norm': 0.5960889305488823, 'learning_rate': 9.472571743602268e-06, 'epoch': 0.17} + 17%|█▋ | 2111/12188 [4:33:06<27:25:19, 9.80s/it] 17%|█▋ | 2112/12188 [4:33:14<25:47:08, 9.21s/it] {'loss': 0.377, 'grad_norm': 0.6229313662134756, 'learning_rate': 9.471977601933748e-06, 'epoch': 0.17} + 17%|█▋ | 2112/12188 [4:33:14<25:47:08, 9.21s/it] 17%|█▋ | 2113/12188 [4:33:21<23:51:46, 8.53s/it] {'loss': 0.3672, 'grad_norm': 0.6157005510837348, 'learning_rate': 9.471383144461355e-06, 'epoch': 0.17} + 17%|█▋ | 2113/12188 [4:33:21<23:51:46, 8.53s/it] 17%|█▋ | 2114/12188 [4:33:28<22:51:54, 8.17s/it] {'loss': 0.396, 'grad_norm': 0.6352097445820374, 'learning_rate': 9.470788371227066e-06, 'epoch': 0.17} + 17%|█▋ | 2114/12188 [4:33:28<22:51:54, 8.17s/it] 17%|█▋ | 2115/12188 [4:33:35<22:04:01, 7.89s/it] {'loss': 0.3726, 'grad_norm': 0.6359464422477211, 'learning_rate': 9.470193282272886e-06, 'epoch': 0.17} + 17%|█▋ | 2115/12188 [4:33:35<22:04:01, 7.89s/it] 17%|█▋ | 2116/12188 [4:33:42<21:11:33, 7.57s/it] {'loss': 0.3869, 'grad_norm': 0.6466230855266695, 'learning_rate': 9.46959787764084e-06, 'epoch': 0.17} + 17%|█▋ | 2116/12188 [4:33:42<21:11:33, 7.57s/it] 17%|█▋ | 2117/12188 [4:33:49<20:45:41, 7.42s/it] {'loss': 0.4248, 'grad_norm': 0.6937326723104547, 'learning_rate': 9.469002157372971e-06, 'epoch': 0.17} + 17%|█▋ | 2117/12188 [4:33:49<20:45:41, 7.42s/it] 17%|█▋ | 2118/12188 [4:33:57<20:51:58, 7.46s/it] {'loss': 0.4502, 'grad_norm': 0.7227652417622037, 'learning_rate': 9.46840612151135e-06, 'epoch': 0.17} + 17%|█▋ | 2118/12188 [4:33:57<20:51:58, 7.46s/it] 17%|█▋ | 2119/12188 [4:34:04<20:26:41, 7.31s/it] {'loss': 0.3789, 'grad_norm': 0.9368103418132826, 'learning_rate': 9.467809770098069e-06, 'epoch': 0.17} + 17%|█▋ | 2119/12188 [4:34:04<20:26:41, 7.31s/it] 17%|█▋ | 2120/12188 [4:34:43<47:10:41, 16.87s/it] {'loss': 0.3969, 'grad_norm': 0.6559765320590938, 'learning_rate': 9.467213103175239e-06, 'epoch': 0.17} + 17%|█▋ | 2120/12188 [4:34:43<47:10:41, 16.87s/it] 17%|█▋ | 2121/12188 [4:35:11<56:15:50, 20.12s/it] {'loss': 0.4373, 'grad_norm': 0.7130207267757129, 'learning_rate': 9.466616120784996e-06, 'epoch': 0.17} + 17%|█▋ | 2121/12188 [4:35:11<56:15:50, 20.12s/it] 17%|█▋ | 2122/12188 [4:35:55<76:36:01, 27.40s/it] {'loss': 0.4026, 'grad_norm': 0.6231641242689382, 'learning_rate': 9.466018822969502e-06, 'epoch': 0.17} + 17%|█▋ | 2122/12188 [4:35:55<76:36:01, 27.40s/it] 17%|█▋ | 2123/12188 [4:36:43<93:56:28, 33.60s/it] {'loss': 0.3914, 'grad_norm': 0.6071613195841269, 'learning_rate': 9.465421209770931e-06, 'epoch': 0.17} + 17%|█▋ | 2123/12188 [4:36:43<93:56:28, 33.60s/it] 17%|█▋ | 2124/12188 [4:37:45<117:10:37, 41.92s/it] {'loss': 0.4304, 'grad_norm': 0.7122064943758393, 'learning_rate': 9.464823281231489e-06, 'epoch': 0.17} + 17%|█▋ | 2124/12188 [4:37:45<117:10:37, 41.92s/it] 17%|█▋ | 2125/12188 [4:38:15<107:48:58, 38.57s/it] {'loss': 0.3896, 'grad_norm': 0.6888877215274384, 'learning_rate': 9.4642250373934e-06, 'epoch': 0.17} + 17%|█▋ | 2125/12188 [4:38:15<107:48:58, 38.57s/it] 17%|█▋ | 2126/12188 [4:39:04<116:37:34, 41.73s/it] {'loss': 0.4259, 'grad_norm': 0.6670143879958849, 'learning_rate': 9.463626478298912e-06, 'epoch': 0.17} + 17%|█▋ | 2126/12188 [4:39:04<116:37:34, 41.73s/it] 17%|█▋ | 2127/12188 [4:39:29<102:23:54, 36.64s/it] {'loss': 0.3686, 'grad_norm': 0.6746645275724326, 'learning_rate': 9.463027603990293e-06, 'epoch': 0.17} + 17%|█▋ | 2127/12188 [4:39:29<102:23:54, 36.64s/it] 17%|█▋ | 2128/12188 [4:39:52<91:15:25, 32.66s/it] {'loss': 0.3707, 'grad_norm': 0.6903876575486265, 'learning_rate': 9.462428414509834e-06, 'epoch': 0.17} + 17%|█▋ | 2128/12188 [4:39:53<91:15:25, 32.66s/it] 17%|█▋ | 2129/12188 [4:40:34<98:56:13, 35.41s/it] {'loss': 0.4032, 'grad_norm': 0.6903866236498873, 'learning_rate': 9.461828909899851e-06, 'epoch': 0.17} + 17%|█▋ | 2129/12188 [4:40:34<98:56:13, 35.41s/it] 17%|█▋ | 2130/12188 [4:41:23<109:58:54, 39.37s/it] {'loss': 0.3992, 'grad_norm': 0.7190446531792516, 'learning_rate': 9.46122909020268e-06, 'epoch': 0.17} + 17%|█▋ | 2130/12188 [4:41:23<109:58:54, 39.37s/it] 17%|█▋ | 2131/12188 [4:42:28<131:07:09, 46.94s/it] {'loss': 0.4284, 'grad_norm': 0.6599878379913392, 'learning_rate': 9.460628955460676e-06, 'epoch': 0.17} + 17%|█▋ | 2131/12188 [4:42:28<131:07:09, 46.94s/it] 17%|█▋ | 2132/12188 [4:42:52<112:29:32, 40.27s/it] {'loss': 0.3577, 'grad_norm': 0.7220997676346619, 'learning_rate': 9.460028505716223e-06, 'epoch': 0.17} + 17%|█▋ | 2132/12188 [4:42:52<112:29:32, 40.27s/it] 18%|█▊ | 2133/12188 [4:42:59<84:41:51, 30.32s/it] {'loss': 0.3605, 'grad_norm': 0.6562808994793623, 'learning_rate': 9.459427741011722e-06, 'epoch': 0.18} + 18%|█▊ | 2133/12188 [4:42:59<84:41:51, 30.32s/it] 18%|█▊ | 2134/12188 [4:43:06<65:06:49, 23.32s/it] {'loss': 0.3965, 'grad_norm': 0.6011486287540881, 'learning_rate': 9.458826661389598e-06, 'epoch': 0.18} + 18%|█▊ | 2134/12188 [4:43:06<65:06:49, 23.32s/it] 18%|█▊ | 2135/12188 [4:43:14<51:49:35, 18.56s/it] {'loss': 0.3958, 'grad_norm': 0.6578600105842128, 'learning_rate': 9.4582252668923e-06, 'epoch': 0.18} + 18%|█▊ | 2135/12188 [4:43:14<51:49:35, 18.56s/it] 18%|█▊ | 2136/12188 [4:43:21<42:22:05, 15.17s/it] {'loss': 0.3844, 'grad_norm': 0.6875046296203334, 'learning_rate': 9.457623557562294e-06, 'epoch': 0.18} + 18%|█▊ | 2136/12188 [4:43:21<42:22:05, 15.17s/it] 18%|█▊ | 2137/12188 [4:43:28<35:26:52, 12.70s/it] {'loss': 0.4127, 'grad_norm': 0.6714086925935329, 'learning_rate': 9.457021533442077e-06, 'epoch': 0.18} + 18%|█▊ | 2137/12188 [4:43:28<35:26:52, 12.70s/it] 18%|█▊ | 2138/12188 [4:43:36<31:07:16, 11.15s/it] {'loss': 0.4163, 'grad_norm': 0.6432822982080353, 'learning_rate': 9.456419194574158e-06, 'epoch': 0.18} + 18%|█▊ | 2138/12188 [4:43:36<31:07:16, 11.15s/it] 18%|█▊ | 2139/12188 [4:43:43<27:53:07, 9.99s/it] {'loss': 0.3708, 'grad_norm': 0.6333231796941724, 'learning_rate': 9.455816541001076e-06, 'epoch': 0.18} + 18%|█▊ | 2139/12188 [4:43:43<27:53:07, 9.99s/it] 18%|█▊ | 2140/12188 [4:43:50<25:19:05, 9.07s/it] {'loss': 0.371, 'grad_norm': 0.6589952753261158, 'learning_rate': 9.45521357276539e-06, 'epoch': 0.18} + 18%|█▊ | 2140/12188 [4:43:50<25:19:05, 9.07s/it] 18%|█▊ | 2141/12188 [4:43:57<23:34:49, 8.45s/it] {'loss': 0.3917, 'grad_norm': 0.6704613264384965, 'learning_rate': 9.454610289909677e-06, 'epoch': 0.18} + 18%|█▊ | 2141/12188 [4:43:57<23:34:49, 8.45s/it] 18%|█▊ | 2142/12188 [4:44:04<22:24:55, 8.03s/it] {'loss': 0.369, 'grad_norm': 0.635807791993477, 'learning_rate': 9.454006692476542e-06, 'epoch': 0.18} + 18%|█▊ | 2142/12188 [4:44:04<22:24:55, 8.03s/it] 18%|█▊ | 2143/12188 [4:44:12<22:28:39, 8.06s/it] {'loss': 0.3799, 'grad_norm': 0.6616100436931275, 'learning_rate': 9.453402780508613e-06, 'epoch': 0.18} + 18%|█▊ | 2143/12188 [4:44:12<22:28:39, 8.06s/it] 18%|█▊ | 2144/12188 [4:44:19<21:28:20, 7.70s/it] {'loss': 0.3963, 'grad_norm': 0.6607526549309787, 'learning_rate': 9.452798554048533e-06, 'epoch': 0.18} + 18%|█▊ | 2144/12188 [4:44:19<21:28:20, 7.70s/it] 18%|█▊ | 2145/12188 [4:44:26<21:23:48, 7.67s/it] {'loss': 0.3866, 'grad_norm': 0.6887396052447419, 'learning_rate': 9.452194013138973e-06, 'epoch': 0.18} + 18%|█▊ | 2145/12188 [4:44:26<21:23:48, 7.67s/it] 18%|█▊ | 2146/12188 [4:44:34<20:58:44, 7.52s/it] {'loss': 0.3962, 'grad_norm': 0.7929722795295286, 'learning_rate': 9.451589157822625e-06, 'epoch': 0.18} + 18%|█▊ | 2146/12188 [4:44:34<20:58:44, 7.52s/it] 18%|█▊ | 2147/12188 [4:44:41<21:10:15, 7.59s/it] {'loss': 0.3814, 'grad_norm': 0.6341056491494356, 'learning_rate': 9.450983988142204e-06, 'epoch': 0.18} + 18%|█▊ | 2147/12188 [4:44:41<21:10:15, 7.59s/it] 18%|█▊ | 2148/12188 [4:44:49<20:59:32, 7.53s/it] {'loss': 0.353, 'grad_norm': 0.6542684426685678, 'learning_rate': 9.450378504140442e-06, 'epoch': 0.18} + 18%|█▊ | 2148/12188 [4:44:49<20:59:32, 7.53s/it] 18%|█▊ | 2149/12188 [4:44:56<20:28:25, 7.34s/it] {'loss': 0.3583, 'grad_norm': 0.6509365656240226, 'learning_rate': 9.449772705860103e-06, 'epoch': 0.18} + 18%|█▊ | 2149/12188 [4:44:56<20:28:25, 7.34s/it] 18%|█▊ | 2150/12188 [4:45:02<20:07:32, 7.22s/it] {'loss': 0.3478, 'grad_norm': 0.618367100191869, 'learning_rate': 9.449166593343963e-06, 'epoch': 0.18} + 18%|█▊ | 2150/12188 [4:45:03<20:07:32, 7.22s/it] 18%|█▊ | 2151/12188 [4:45:09<19:42:31, 7.07s/it] {'loss': 0.3607, 'grad_norm': 0.662083463356088, 'learning_rate': 9.448560166634825e-06, 'epoch': 0.18} + 18%|█▊ | 2151/12188 [4:45:09<19:42:31, 7.07s/it] 18%|█▊ | 2152/12188 [4:45:18<21:26:31, 7.69s/it] {'loss': 0.3857, 'grad_norm': 0.655070480347313, 'learning_rate': 9.447953425775517e-06, 'epoch': 0.18} + 18%|█▊ | 2152/12188 [4:45:18<21:26:31, 7.69s/it] 18%|█▊ | 2153/12188 [4:45:25<20:52:58, 7.49s/it] {'loss': 0.3754, 'grad_norm': 0.6623319019243954, 'learning_rate': 9.447346370808886e-06, 'epoch': 0.18} + 18%|█▊ | 2153/12188 [4:45:25<20:52:58, 7.49s/it] 18%|█▊ | 2154/12188 [4:45:36<23:45:04, 8.52s/it] {'loss': 0.3936, 'grad_norm': 0.6269102976922356, 'learning_rate': 9.446739001777797e-06, 'epoch': 0.18} + 18%|█▊ | 2154/12188 [4:45:36<23:45:04, 8.52s/it] 18%|█▊ | 2155/12188 [4:45:43<22:34:13, 8.10s/it] {'loss': 0.4305, 'grad_norm': 0.7102098290008985, 'learning_rate': 9.446131318725144e-06, 'epoch': 0.18} + 18%|█▊ | 2155/12188 [4:45:43<22:34:13, 8.10s/it] 18%|█▊ | 2156/12188 [4:45:50<21:38:35, 7.77s/it] {'loss': 0.3979, 'grad_norm': 0.6228675869218296, 'learning_rate': 9.445523321693841e-06, 'epoch': 0.18} + 18%|█▊ | 2156/12188 [4:45:50<21:38:35, 7.77s/it] 18%|█▊ | 2157/12188 [4:45:57<21:03:30, 7.56s/it] {'loss': 0.4103, 'grad_norm': 0.6466026366331475, 'learning_rate': 9.444915010726823e-06, 'epoch': 0.18} + 18%|█▊ | 2157/12188 [4:45:57<21:03:30, 7.56s/it] 18%|█▊ | 2158/12188 [4:46:05<20:42:48, 7.43s/it] {'loss': 0.3648, 'grad_norm': 0.6033933674811555, 'learning_rate': 9.444306385867048e-06, 'epoch': 0.18} + 18%|█▊ | 2158/12188 [4:46:05<20:42:48, 7.43s/it] 18%|█▊ | 2159/12188 [4:46:12<20:14:58, 7.27s/it] {'loss': 0.4029, 'grad_norm': 0.6566147770884809, 'learning_rate': 9.443697447157498e-06, 'epoch': 0.18} + 18%|█▊ | 2159/12188 [4:46:12<20:14:58, 7.27s/it] 18%|█▊ | 2160/12188 [4:46:19<20:22:56, 7.32s/it] {'loss': 0.4132, 'grad_norm': 0.7060382086212109, 'learning_rate': 9.443088194641173e-06, 'epoch': 0.18} + 18%|█▊ | 2160/12188 [4:46:19<20:22:56, 7.32s/it] 18%|█▊ | 2161/12188 [4:46:26<19:48:06, 7.11s/it] {'loss': 0.3591, 'grad_norm': 0.6485147074556397, 'learning_rate': 9.442478628361098e-06, 'epoch': 0.18} + 18%|█▊ | 2161/12188 [4:46:26<19:48:06, 7.11s/it] 18%|█▊ | 2162/12188 [4:46:33<19:40:02, 7.06s/it] {'loss': 0.3788, 'grad_norm': 0.6344716263267414, 'learning_rate': 9.441868748360318e-06, 'epoch': 0.18} + 18%|█▊ | 2162/12188 [4:46:33<19:40:02, 7.06s/it] 18%|█▊ | 2163/12188 [4:46:39<19:19:07, 6.94s/it] {'loss': 0.4053, 'grad_norm': 0.736493312309787, 'learning_rate': 9.441258554681905e-06, 'epoch': 0.18} + 18%|█▊ | 2163/12188 [4:46:39<19:19:07, 6.94s/it] 18%|█▊ | 2164/12188 [4:46:49<22:08:10, 7.95s/it] {'loss': 0.3871, 'grad_norm': 0.5898464570465856, 'learning_rate': 9.440648047368947e-06, 'epoch': 0.18} + 18%|█▊ | 2164/12188 [4:46:49<22:08:10, 7.95s/it] 18%|█▊ | 2165/12188 [4:46:56<21:16:24, 7.64s/it] {'loss': 0.3933, 'grad_norm': 0.6842687449141741, 'learning_rate': 9.44003722646456e-06, 'epoch': 0.18} + 18%|█▊ | 2165/12188 [4:46:56<21:16:24, 7.64s/it] 18%|█▊ | 2166/12188 [4:47:04<21:35:35, 7.76s/it] {'loss': 0.4111, 'grad_norm': 0.6376928603520313, 'learning_rate': 9.439426092011877e-06, 'epoch': 0.18} + 18%|█▊ | 2166/12188 [4:47:04<21:35:35, 7.76s/it] 18%|█▊ | 2167/12188 [4:47:12<21:05:12, 7.58s/it] {'loss': 0.4367, 'grad_norm': 0.6949546092846008, 'learning_rate': 9.438814644054055e-06, 'epoch': 0.18} + 18%|█▊ | 2167/12188 [4:47:12<21:05:12, 7.58s/it] 18%|█▊ | 2168/12188 [4:47:19<20:47:36, 7.47s/it] {'loss': 0.3709, 'grad_norm': 0.6211866752560201, 'learning_rate': 9.438202882634275e-06, 'epoch': 0.18} + 18%|█▊ | 2168/12188 [4:47:19<20:47:36, 7.47s/it] 18%|█▊ | 2169/12188 [4:47:26<20:25:09, 7.34s/it] {'loss': 0.368, 'grad_norm': 0.6129535788611599, 'learning_rate': 9.437590807795739e-06, 'epoch': 0.18} + 18%|█▊ | 2169/12188 [4:47:26<20:25:09, 7.34s/it] 18%|█▊ | 2170/12188 [4:47:34<21:03:43, 7.57s/it] {'loss': 0.3937, 'grad_norm': 0.6605104676798506, 'learning_rate': 9.436978419581668e-06, 'epoch': 0.18} + 18%|█▊ | 2170/12188 [4:47:34<21:03:43, 7.57s/it] 18%|█▊ | 2171/12188 [4:47:42<21:12:45, 7.62s/it] {'loss': 0.3722, 'grad_norm': 0.6647572239361348, 'learning_rate': 9.436365718035309e-06, 'epoch': 0.18} + 18%|█▊ | 2171/12188 [4:47:42<21:12:45, 7.62s/it] 18%|█▊ | 2172/12188 [4:47:49<21:00:07, 7.55s/it] {'loss': 0.3793, 'grad_norm': 0.6277074233442591, 'learning_rate': 9.435752703199933e-06, 'epoch': 0.18} + 18%|█▊ | 2172/12188 [4:47:49<21:00:07, 7.55s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 18%|█▊ | 2173/12188 [4:47:55<19:53:17, 7.15s/it] {'loss': 0.7329, 'grad_norm': 1.824599905107891, 'learning_rate': 9.435139375118828e-06, 'epoch': 0.18} + 18%|█▊ | 2173/12188 [4:47:55<19:53:17, 7.15s/it] 18%|█▊ | 2174/12188 [4:48:03<20:43:42, 7.45s/it] {'loss': 0.4478, 'grad_norm': 0.7229824637649559, 'learning_rate': 9.434525733835303e-06, 'epoch': 0.18} + 18%|█▊ | 2174/12188 [4:48:03<20:43:42, 7.45s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f07f22a8ae0> +[Try #0] Failed to fetch sample 4736977 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f07f22a8ae0> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'See all eligible items and terms'"}, {'from': 'gpt', 'value': '\nclick(x=0.428, y=0.326)\n'}]} + 18%|█▊ | 2175/12188 [4:48:10<20:03:15, 7.21s/it] {'loss': 0.4268, 'grad_norm': 0.6637279868888083, 'learning_rate': 9.433911779392697e-06, 'epoch': 0.18} + 18%|█▊ | 2175/12188 [4:48:10<20:03:15, 7.21s/it] 18%|█▊ | 2176/12188 [4:48:18<20:44:02, 7.46s/it] {'loss': 0.4217, 'grad_norm': 0.6900268003365817, 'learning_rate': 9.433297511834365e-06, 'epoch': 0.18} + 18%|█▊ | 2176/12188 [4:48:18<20:44:02, 7.46s/it] 18%|█▊ | 2177/12188 [4:48:25<20:04:00, 7.22s/it] {'loss': 0.3694, 'grad_norm': 0.6622097783072212, 'learning_rate': 9.432682931203685e-06, 'epoch': 0.18} + 18%|█▊ | 2177/12188 [4:48:25<20:04:00, 7.22s/it] 18%|█▊ | 2178/12188 [4:48:33<20:37:05, 7.42s/it] {'loss': 0.446, 'grad_norm': 0.6724116575076883, 'learning_rate': 9.432068037544056e-06, 'epoch': 0.18} + 18%|█▊ | 2178/12188 [4:48:33<20:37:05, 7.42s/it] 18%|█▊ | 2179/12188 [4:48:39<20:04:10, 7.22s/it] {'loss': 0.3767, 'grad_norm': 0.6439994823881346, 'learning_rate': 9.431452830898906e-06, 'epoch': 0.18} + 18%|█▊ | 2179/12188 [4:48:39<20:04:10, 7.22s/it] 18%|█▊ | 2180/12188 [4:48:47<20:45:03, 7.46s/it] {'loss': 0.3905, 'grad_norm': 0.7168670934860905, 'learning_rate': 9.430837311311675e-06, 'epoch': 0.18} + 18%|█▊ | 2180/12188 [4:48:47<20:45:03, 7.46s/it] 18%|█▊ | 2181/12188 [4:48:54<20:10:34, 7.26s/it] {'loss': 0.4314, 'grad_norm': 0.7205890888156126, 'learning_rate': 9.430221478825832e-06, 'epoch': 0.18} + 18%|█▊ | 2181/12188 [4:48:54<20:10:34, 7.26s/it] 18%|█▊ | 2182/12188 [4:49:01<20:09:44, 7.25s/it] {'loss': 0.4139, 'grad_norm': 0.723417091544534, 'learning_rate': 9.429605333484866e-06, 'epoch': 0.18} + 18%|█▊ | 2182/12188 [4:49:01<20:09:44, 7.25s/it] 18%|█▊ | 2183/12188 [4:49:10<20:56:11, 7.53s/it] {'loss': 0.3668, 'grad_norm': 0.6645417683186658, 'learning_rate': 9.428988875332287e-06, 'epoch': 0.18} + 18%|█▊ | 2183/12188 [4:49:10<20:56:11, 7.53s/it] 18%|█▊ | 2184/12188 [4:49:17<20:56:27, 7.54s/it] {'loss': 0.3724, 'grad_norm': 0.6882091372275944, 'learning_rate': 9.428372104411632e-06, 'epoch': 0.18} + 18%|█▊ | 2184/12188 [4:49:17<20:56:27, 7.54s/it] 18%|█▊ | 2185/12188 [4:49:25<21:28:31, 7.73s/it] {'loss': 0.3494, 'grad_norm': 0.5792270665112401, 'learning_rate': 9.427755020766451e-06, 'epoch': 0.18} + 18%|█▊ | 2185/12188 [4:49:25<21:28:31, 7.73s/it] 18%|█▊ | 2186/12188 [4:49:32<20:51:14, 7.51s/it] {'loss': 0.4406, 'grad_norm': 0.6295093796940383, 'learning_rate': 9.427137624440325e-06, 'epoch': 0.18} + 18%|█▊ | 2186/12188 [4:49:32<20:51:14, 7.51s/it] 18%|█▊ | 2187/12188 [4:49:40<20:32:42, 7.40s/it] {'loss': 0.3901, 'grad_norm': 0.630594763016925, 'learning_rate': 9.426519915476854e-06, 'epoch': 0.18} + 18%|█▊ | 2187/12188 [4:49:40<20:32:42, 7.40s/it] 18%|█▊ | 2188/12188 [4:49:46<20:01:35, 7.21s/it] {'loss': 0.4095, 'grad_norm': 0.6791929483766356, 'learning_rate': 9.425901893919658e-06, 'epoch': 0.18} + 18%|█▊ | 2188/12188 [4:49:46<20:01:35, 7.21s/it] 18%|█▊ | 2189/12188 [4:49:53<19:34:47, 7.05s/it] {'loss': 0.3679, 'grad_norm': 0.6178873734258511, 'learning_rate': 9.425283559812379e-06, 'epoch': 0.18} + 18%|█▊ | 2189/12188 [4:49:53<19:34:47, 7.05s/it] 18%|█▊ | 2190/12188 [4:50:01<20:16:36, 7.30s/it] {'loss': 0.4019, 'grad_norm': 0.6370518953313021, 'learning_rate': 9.424664913198687e-06, 'epoch': 0.18} + 18%|█▊ | 2190/12188 [4:50:01<20:16:36, 7.30s/it] 18%|█▊ | 2191/12188 [4:50:08<20:19:00, 7.32s/it] {'loss': 0.3964, 'grad_norm': 0.6632717450535445, 'learning_rate': 9.424045954122267e-06, 'epoch': 0.18} + 18%|█▊ | 2191/12188 [4:50:08<20:19:00, 7.32s/it] 18%|█▊ | 2192/12188 [4:50:15<20:08:30, 7.25s/it] {'loss': 0.3768, 'grad_norm': 0.6920225652980718, 'learning_rate': 9.423426682626828e-06, 'epoch': 0.18} + 18%|█▊ | 2192/12188 [4:50:15<20:08:30, 7.25s/it] 18%|█▊ | 2193/12188 [4:50:23<20:48:41, 7.50s/it] {'loss': 0.374, 'grad_norm': 0.6499908122947389, 'learning_rate': 9.422807098756105e-06, 'epoch': 0.18} + 18%|█▊ | 2193/12188 [4:50:23<20:48:41, 7.50s/it] 18%|█▊ | 2194/12188 [4:50:30<20:26:37, 7.36s/it] {'loss': 0.4067, 'grad_norm': 0.6196275280355542, 'learning_rate': 9.42218720255385e-06, 'epoch': 0.18} + 18%|█▊ | 2194/12188 [4:50:30<20:26:37, 7.36s/it] 18%|█▊ | 2195/12188 [4:50:38<20:12:44, 7.28s/it] {'loss': 0.3716, 'grad_norm': 0.6190436076150186, 'learning_rate': 9.42156699406384e-06, 'epoch': 0.18} + 18%|█▊ | 2195/12188 [4:50:38<20:12:44, 7.28s/it] 18%|█▊ | 2196/12188 [4:50:44<19:45:43, 7.12s/it] {'loss': 0.3987, 'grad_norm': 0.6346424346874966, 'learning_rate': 9.420946473329872e-06, 'epoch': 0.18} + 18%|█▊ | 2196/12188 [4:50:44<19:45:43, 7.12s/it] 18%|█▊ | 2197/12188 [4:50:51<19:29:00, 7.02s/it] {'loss': 0.3642, 'grad_norm': 0.67746073696769, 'learning_rate': 9.420325640395767e-06, 'epoch': 0.18} + 18%|█▊ | 2197/12188 [4:50:51<19:29:00, 7.02s/it] 18%|█▊ | 2198/12188 [4:50:58<19:24:01, 6.99s/it] {'loss': 0.3832, 'grad_norm': 0.6017433317658702, 'learning_rate': 9.419704495305367e-06, 'epoch': 0.18} + 18%|█▊ | 2198/12188 [4:50:58<19:24:01, 6.99s/it] 18%|█▊ | 2199/12188 [4:51:05<19:20:54, 6.97s/it] {'loss': 0.3594, 'grad_norm': 0.7753335315988652, 'learning_rate': 9.419083038102537e-06, 'epoch': 0.18} + 18%|█▊ | 2199/12188 [4:51:05<19:20:54, 6.97s/it] 18%|█▊ | 2200/12188 [4:51:12<19:07:53, 6.90s/it] {'loss': 0.396, 'grad_norm': 0.6837616948780358, 'learning_rate': 9.418461268831162e-06, 'epoch': 0.18} + 18%|█▊ | 2200/12188 [4:51:12<19:07:53, 6.90s/it] 18%|█▊ | 2201/12188 [4:51:19<19:30:08, 7.03s/it] {'loss': 0.3588, 'grad_norm': 0.6502643563205313, 'learning_rate': 9.417839187535151e-06, 'epoch': 0.18} + 18%|█▊ | 2201/12188 [4:51:19<19:30:08, 7.03s/it] 18%|█▊ | 2202/12188 [4:51:26<19:12:25, 6.92s/it] {'loss': 0.3561, 'grad_norm': 0.6523448146213707, 'learning_rate': 9.417216794258435e-06, 'epoch': 0.18} + 18%|█▊ | 2202/12188 [4:51:26<19:12:25, 6.92s/it] 18%|█▊ | 2203/12188 [4:51:33<19:18:52, 6.96s/it] {'loss': 0.3936, 'grad_norm': 0.6685912226017888, 'learning_rate': 9.416594089044966e-06, 'epoch': 0.18} + 18%|█▊ | 2203/12188 [4:51:33<19:18:52, 6.96s/it] 18%|█▊ | 2204/12188 [4:51:39<18:59:11, 6.85s/it] {'loss': 0.4101, 'grad_norm': 0.6547343545914671, 'learning_rate': 9.415971071938717e-06, 'epoch': 0.18} + 18%|█▊ | 2204/12188 [4:51:39<18:59:11, 6.85s/it] 18%|█▊ | 2205/12188 [4:51:46<19:09:36, 6.91s/it] {'loss': 0.3808, 'grad_norm': 1.0548170795569114, 'learning_rate': 9.415347742983686e-06, 'epoch': 0.18} + 18%|█▊ | 2205/12188 [4:51:46<19:09:36, 6.91s/it] 18%|█▊ | 2206/12188 [4:51:54<19:26:31, 7.01s/it] {'loss': 0.4106, 'grad_norm': 0.7316710816740809, 'learning_rate': 9.41472410222389e-06, 'epoch': 0.18} + 18%|█▊ | 2206/12188 [4:51:54<19:26:31, 7.01s/it] 18%|█▊ | 2207/12188 [4:52:01<20:07:08, 7.26s/it] {'loss': 0.3663, 'grad_norm': 0.6603312746399724, 'learning_rate': 9.414100149703373e-06, 'epoch': 0.18} + 18%|█▊ | 2207/12188 [4:52:01<20:07:08, 7.26s/it] 18%|█▊ | 2208/12188 [4:52:08<19:31:11, 7.04s/it] {'loss': 0.4044, 'grad_norm': 0.7073172738802865, 'learning_rate': 9.413475885466195e-06, 'epoch': 0.18} + 18%|█▊ | 2208/12188 [4:52:08<19:31:11, 7.04s/it] 18%|█▊ | 2209/12188 [4:52:15<19:55:15, 7.19s/it] {'loss': 0.3783, 'grad_norm': 0.6663641540504266, 'learning_rate': 9.41285130955644e-06, 'epoch': 0.18} + 18%|█▊ | 2209/12188 [4:52:15<19:55:15, 7.19s/it] 18%|█▊ | 2210/12188 [4:52:23<19:49:24, 7.15s/it] {'loss': 0.3783, 'grad_norm': 0.6881091429795879, 'learning_rate': 9.412226422018215e-06, 'epoch': 0.18} + 18%|█▊ | 2210/12188 [4:52:23<19:49:24, 7.15s/it] 18%|█▊ | 2211/12188 [4:52:30<19:58:38, 7.21s/it] {'loss': 0.394, 'grad_norm': 0.6733045992143493, 'learning_rate': 9.41160122289565e-06, 'epoch': 0.18} + 18%|█▊ | 2211/12188 [4:52:30<19:58:38, 7.21s/it] 18%|█▊ | 2212/12188 [4:52:40<22:41:30, 8.19s/it] {'loss': 0.3714, 'grad_norm': 0.6146932072189493, 'learning_rate': 9.410975712232894e-06, 'epoch': 0.18} + 18%|█▊ | 2212/12188 [4:52:40<22:41:30, 8.19s/it] 18%|█▊ | 2213/12188 [4:52:47<21:48:12, 7.87s/it] {'loss': 0.3502, 'grad_norm': 0.632481681787621, 'learning_rate': 9.41034989007412e-06, 'epoch': 0.18} + 18%|█▊ | 2213/12188 [4:52:47<21:48:12, 7.87s/it] 18%|█▊ | 2214/12188 [4:52:55<21:18:03, 7.69s/it] {'loss': 0.374, 'grad_norm': 0.6569777079337834, 'learning_rate': 9.409723756463521e-06, 'epoch': 0.18} + 18%|█▊ | 2214/12188 [4:52:55<21:18:03, 7.69s/it] 18%|█▊ | 2215/12188 [4:53:04<22:23:06, 8.08s/it] {'loss': 0.4016, 'grad_norm': 0.6611920731383617, 'learning_rate': 9.409097311445317e-06, 'epoch': 0.18} + 18%|█▊ | 2215/12188 [4:53:04<22:23:06, 8.08s/it] 18%|█▊ | 2216/12188 [4:53:11<21:49:17, 7.88s/it] {'loss': 0.3624, 'grad_norm': 0.6467827985571128, 'learning_rate': 9.408470555063744e-06, 'epoch': 0.18} + 18%|█▊ | 2216/12188 [4:53:11<21:49:17, 7.88s/it] 18%|█▊ | 2217/12188 [4:53:18<21:06:07, 7.62s/it] {'loss': 0.3792, 'grad_norm': 0.646655663101853, 'learning_rate': 9.407843487363064e-06, 'epoch': 0.18} + 18%|█▊ | 2217/12188 [4:53:18<21:06:07, 7.62s/it] 18%|█▊ | 2218/12188 [4:53:25<20:40:09, 7.46s/it] {'loss': 0.3488, 'grad_norm': 0.6274145628844251, 'learning_rate': 9.407216108387556e-06, 'epoch': 0.18} + 18%|█▊ | 2218/12188 [4:53:25<20:40:09, 7.46s/it] 18%|█▊ | 2219/12188 [4:53:32<20:01:09, 7.23s/it] {'loss': 0.3752, 'grad_norm': 0.6220082768281531, 'learning_rate': 9.406588418181531e-06, 'epoch': 0.18} + 18%|█▊ | 2219/12188 [4:53:32<20:01:09, 7.23s/it] 18%|█▊ | 2220/12188 [4:53:39<19:37:14, 7.09s/it] {'loss': 0.3534, 'grad_norm': 0.6155965760256709, 'learning_rate': 9.405960416789307e-06, 'epoch': 0.18} + 18%|█▊ | 2220/12188 [4:53:39<19:37:14, 7.09s/it] 18%|█▊ | 2221/12188 [4:53:49<22:08:06, 8.00s/it] {'loss': 0.4078, 'grad_norm': 0.6823560486937664, 'learning_rate': 9.40533210425524e-06, 'epoch': 0.18} + 18%|█▊ | 2221/12188 [4:53:49<22:08:06, 8.00s/it] 18%|█▊ | 2222/12188 [4:53:55<20:57:49, 7.57s/it] {'loss': 0.323, 'grad_norm': 0.6457696871254243, 'learning_rate': 9.404703480623697e-06, 'epoch': 0.18} + 18%|█▊ | 2222/12188 [4:53:55<20:57:49, 7.57s/it] 18%|█▊ | 2223/12188 [4:54:03<20:47:34, 7.51s/it] {'loss': 0.3688, 'grad_norm': 0.6896538390069588, 'learning_rate': 9.404074545939073e-06, 'epoch': 0.18} + 18%|█▊ | 2223/12188 [4:54:03<20:47:34, 7.51s/it] 18%|█▊ | 2224/12188 [4:54:10<20:41:28, 7.48s/it] {'loss': 0.3777, 'grad_norm': 0.6195564738461724, 'learning_rate': 9.403445300245777e-06, 'epoch': 0.18} + 18%|█▊ | 2224/12188 [4:54:10<20:41:28, 7.48s/it] 18%|█▊ | 2225/12188 [4:54:17<20:15:00, 7.32s/it] {'loss': 0.3837, 'grad_norm': 0.6848346227998103, 'learning_rate': 9.40281574358825e-06, 'epoch': 0.18} + 18%|█▊ | 2225/12188 [4:54:17<20:15:00, 7.32s/it] 18%|█▊ | 2226/12188 [4:54:24<20:19:00, 7.34s/it] {'loss': 0.4466, 'grad_norm': 0.6576468898410852, 'learning_rate': 9.402185876010949e-06, 'epoch': 0.18} + 18%|█▊ | 2226/12188 [4:54:25<20:19:00, 7.34s/it] 18%|█▊ | 2227/12188 [4:54:31<19:56:33, 7.21s/it] {'loss': 0.3872, 'grad_norm': 0.6528249401591845, 'learning_rate': 9.401555697558353e-06, 'epoch': 0.18} + 18%|█▊ | 2227/12188 [4:54:31<19:56:33, 7.21s/it] 18%|█▊ | 2228/12188 [4:54:38<19:28:00, 7.04s/it] {'loss': 0.388, 'grad_norm': 0.6796720253416653, 'learning_rate': 9.400925208274966e-06, 'epoch': 0.18} + 18%|█▊ | 2228/12188 [4:54:38<19:28:00, 7.04s/it] 18%|█▊ | 2229/12188 [4:54:45<19:33:18, 7.07s/it] {'loss': 0.3669, 'grad_norm': 0.5902597180970633, 'learning_rate': 9.400294408205312e-06, 'epoch': 0.18} + 18%|█▊ | 2229/12188 [4:54:45<19:33:18, 7.07s/it] 18%|█▊ | 2230/12188 [4:54:52<19:29:00, 7.04s/it] {'loss': 0.3968, 'grad_norm': 0.6834302477887529, 'learning_rate': 9.399663297393937e-06, 'epoch': 0.18} + 18%|█▊ | 2230/12188 [4:54:52<19:29:00, 7.04s/it] 18%|█▊ | 2231/12188 [4:54:59<19:07:32, 6.91s/it] {'loss': 0.3809, 'grad_norm': 0.7071145276524656, 'learning_rate': 9.399031875885408e-06, 'epoch': 0.18} + 18%|█▊ | 2231/12188 [4:54:59<19:07:32, 6.91s/it] 18%|█▊ | 2232/12188 [4:55:06<19:19:35, 6.99s/it] {'loss': 0.3522, 'grad_norm': 0.6554898320757157, 'learning_rate': 9.398400143724313e-06, 'epoch': 0.18} + 18%|█▊ | 2232/12188 [4:55:06<19:19:35, 6.99s/it] 18%|█▊ | 2233/12188 [4:55:13<19:02:57, 6.89s/it] {'loss': 0.4006, 'grad_norm': 0.6567173127897523, 'learning_rate': 9.39776810095527e-06, 'epoch': 0.18} + 18%|█▊ | 2233/12188 [4:55:13<19:02:57, 6.89s/it] 18%|█▊ | 2234/12188 [4:55:20<19:40:01, 7.11s/it] {'loss': 0.3469, 'grad_norm': 0.5944488131242966, 'learning_rate': 9.397135747622907e-06, 'epoch': 0.18} + 18%|█▊ | 2234/12188 [4:55:20<19:40:01, 7.11s/it] 18%|█▊ | 2235/12188 [4:55:27<19:04:21, 6.90s/it] {'loss': 0.3798, 'grad_norm': 0.6637165438887703, 'learning_rate': 9.396503083771883e-06, 'epoch': 0.18} + 18%|█▊ | 2235/12188 [4:55:27<19:04:21, 6.90s/it] 18%|█▊ | 2236/12188 [4:55:33<18:56:36, 6.85s/it] {'loss': 0.3495, 'grad_norm': 0.6279468266552583, 'learning_rate': 9.395870109446874e-06, 'epoch': 0.18} + 18%|█▊ | 2236/12188 [4:55:33<18:56:36, 6.85s/it] 18%|█▊ | 2237/12188 [4:55:42<20:12:44, 7.31s/it] {'loss': 0.4, 'grad_norm': 0.6158115677833944, 'learning_rate': 9.395236824692582e-06, 'epoch': 0.18} + 18%|█▊ | 2237/12188 [4:55:42<20:12:44, 7.31s/it] 18%|█▊ | 2238/12188 [4:55:48<19:34:10, 7.08s/it] {'loss': 0.3646, 'grad_norm': 0.6619265761041224, 'learning_rate': 9.394603229553724e-06, 'epoch': 0.18} + 18%|█▊ | 2238/12188 [4:55:48<19:34:10, 7.08s/it] 18%|█▊ | 2239/12188 [4:55:55<19:12:38, 6.95s/it] {'loss': 0.397, 'grad_norm': 0.6076963455303073, 'learning_rate': 9.39396932407505e-06, 'epoch': 0.18} + 18%|█▊ | 2239/12188 [4:55:55<19:12:38, 6.95s/it] 18%|█▊ | 2240/12188 [4:56:02<19:01:57, 6.89s/it] {'loss': 0.3688, 'grad_norm': 0.699870375233772, 'learning_rate': 9.393335108301319e-06, 'epoch': 0.18} + 18%|█▊ | 2240/12188 [4:56:02<19:01:57, 6.89s/it] 18%|█▊ | 2241/12188 [4:56:09<19:30:08, 7.06s/it] {'loss': 0.38, 'grad_norm': 0.6770508207131117, 'learning_rate': 9.392700582277322e-06, 'epoch': 0.18} + 18%|█▊ | 2241/12188 [4:56:09<19:30:08, 7.06s/it] 18%|█▊ | 2242/12188 [4:56:16<19:22:59, 7.02s/it] {'loss': 0.3692, 'grad_norm': 0.6935104706403638, 'learning_rate': 9.392065746047867e-06, 'epoch': 0.18} + 18%|█▊ | 2242/12188 [4:56:16<19:22:59, 7.02s/it] 18%|█▊ | 2243/12188 [4:56:23<19:06:14, 6.92s/it] {'loss': 0.3927, 'grad_norm': 0.6858485591562076, 'learning_rate': 9.391430599657786e-06, 'epoch': 0.18} + 18%|█▊ | 2243/12188 [4:56:23<19:06:14, 6.92s/it] 18%|█▊ | 2244/12188 [4:56:29<18:43:32, 6.78s/it] {'loss': 0.4174, 'grad_norm': 0.7478212998293969, 'learning_rate': 9.390795143151932e-06, 'epoch': 0.18} + 18%|█▊ | 2244/12188 [4:56:29<18:43:32, 6.78s/it] 18%|█▊ | 2245/12188 [4:56:36<18:28:28, 6.69s/it] {'loss': 0.3991, 'grad_norm': 0.6434551146699471, 'learning_rate': 9.390159376575178e-06, 'epoch': 0.18} + 18%|█▊ | 2245/12188 [4:56:36<18:28:28, 6.69s/it] 18%|█▊ | 2246/12188 [4:56:43<18:41:56, 6.77s/it] {'loss': 0.3999, 'grad_norm': 0.6727178614128846, 'learning_rate': 9.389523299972422e-06, 'epoch': 0.18} + 18%|█▊ | 2246/12188 [4:56:43<18:41:56, 6.77s/it] 18%|█▊ | 2247/12188 [4:56:49<18:40:28, 6.76s/it] {'loss': 0.3895, 'grad_norm': 0.6407423966574375, 'learning_rate': 9.388886913388584e-06, 'epoch': 0.18} + 18%|█▊ | 2247/12188 [4:56:49<18:40:28, 6.76s/it] 18%|█▊ | 2248/12188 [4:56:56<18:50:14, 6.82s/it] {'loss': 0.3763, 'grad_norm': 0.6421328858081942, 'learning_rate': 9.388250216868605e-06, 'epoch': 0.18} + 18%|█▊ | 2248/12188 [4:56:56<18:50:14, 6.82s/it] 18%|█▊ | 2249/12188 [4:57:05<20:11:08, 7.31s/it] {'loss': 0.3918, 'grad_norm': 0.6595486341259511, 'learning_rate': 9.387613210457442e-06, 'epoch': 0.18} + 18%|█▊ | 2249/12188 [4:57:05<20:11:08, 7.31s/it] 18%|��▊ | 2250/12188 [4:57:12<19:44:26, 7.15s/it] {'loss': 0.3681, 'grad_norm': 0.6248532575258672, 'learning_rate': 9.386975894200085e-06, 'epoch': 0.18} + 18%|█▊ | 2250/12188 [4:57:12<19:44:26, 7.15s/it] 18%|█▊ | 2251/12188 [4:57:18<19:20:26, 7.01s/it] {'loss': 0.4039, 'grad_norm': 0.6479105870233962, 'learning_rate': 9.38633826814154e-06, 'epoch': 0.18} + 18%|█▊ | 2251/12188 [4:57:18<19:20:26, 7.01s/it] 18%|█▊ | 2252/12188 [4:57:25<19:28:46, 7.06s/it] {'loss': 0.3808, 'grad_norm': 0.9624248925461987, 'learning_rate': 9.385700332326833e-06, 'epoch': 0.18} + 18%|█▊ | 2252/12188 [4:57:25<19:28:46, 7.06s/it] 18%|█▊ | 2253/12188 [4:57:33<19:35:35, 7.10s/it] {'loss': 0.4139, 'grad_norm': 0.856002467069955, 'learning_rate': 9.385062086801013e-06, 'epoch': 0.18} + 18%|█▊ | 2253/12188 [4:57:33<19:35:35, 7.10s/it] 18%|█▊ | 2254/12188 [4:57:39<19:23:44, 7.03s/it] {'loss': 0.3781, 'grad_norm': 0.6268356099297339, 'learning_rate': 9.384423531609154e-06, 'epoch': 0.18} + 18%|█▊ | 2254/12188 [4:57:39<19:23:44, 7.03s/it] 19%|█▊ | 2255/12188 [4:57:47<19:52:36, 7.20s/it] {'loss': 0.3924, 'grad_norm': 0.6955185856448675, 'learning_rate': 9.383784666796351e-06, 'epoch': 0.19} + 19%|█▊ | 2255/12188 [4:57:47<19:52:36, 7.20s/it] 19%|█▊ | 2256/12188 [4:57:54<19:34:26, 7.09s/it] {'loss': 0.3746, 'grad_norm': 0.6887244874005455, 'learning_rate': 9.383145492407716e-06, 'epoch': 0.19} + 19%|█▊ | 2256/12188 [4:57:54<19:34:26, 7.09s/it] 19%|█▊ | 2257/12188 [4:58:02<20:17:18, 7.35s/it] {'loss': 0.4097, 'grad_norm': 0.7884194179745071, 'learning_rate': 9.38250600848839e-06, 'epoch': 0.19} + 19%|█▊ | 2257/12188 [4:58:02<20:17:18, 7.35s/it] 19%|█▊ | 2258/12188 [4:58:09<19:52:32, 7.21s/it] {'loss': 0.3694, 'grad_norm': 0.6243083294238491, 'learning_rate': 9.38186621508353e-06, 'epoch': 0.19} + 19%|█▊ | 2258/12188 [4:58:09<19:52:32, 7.21s/it] 19%|█▊ | 2259/12188 [4:58:16<19:32:20, 7.08s/it] {'loss': 0.4328, 'grad_norm': 0.8161845516123343, 'learning_rate': 9.38122611223832e-06, 'epoch': 0.19} + 19%|█▊ | 2259/12188 [4:58:16<19:32:20, 7.08s/it] 19%|█▊ | 2260/12188 [4:58:24<20:21:32, 7.38s/it] {'loss': 0.386, 'grad_norm': 0.6605651214233957, 'learning_rate': 9.380585699997957e-06, 'epoch': 0.19} + 19%|█▊ | 2260/12188 [4:58:24<20:21:32, 7.38s/it] 19%|█▊ | 2261/12188 [4:58:31<20:06:56, 7.29s/it] {'loss': 0.3917, 'grad_norm': 0.6252312026972461, 'learning_rate': 9.379944978407673e-06, 'epoch': 0.19} + 19%|█▊ | 2261/12188 [4:58:31<20:06:56, 7.29s/it] 19%|█▊ | 2262/12188 [4:58:38<19:55:56, 7.23s/it] {'loss': 0.3685, 'grad_norm': 0.6145930999132546, 'learning_rate': 9.37930394751271e-06, 'epoch': 0.19} + 19%|█▊ | 2262/12188 [4:58:38<19:55:56, 7.23s/it] 19%|█▊ | 2263/12188 [4:58:47<21:26:40, 7.78s/it] {'loss': 0.388, 'grad_norm': 0.6367830376133443, 'learning_rate': 9.378662607358337e-06, 'epoch': 0.19} + 19%|█▊ | 2263/12188 [4:58:47<21:26:40, 7.78s/it] 19%|█▊ | 2264/12188 [4:58:54<20:41:44, 7.51s/it] {'loss': 0.4639, 'grad_norm': 0.7091900224133999, 'learning_rate': 9.378020957989849e-06, 'epoch': 0.19} + 19%|█▊ | 2264/12188 [4:58:54<20:41:44, 7.51s/it] 19%|█▊ | 2265/12188 [4:59:00<19:51:03, 7.20s/it] {'loss': 0.3668, 'grad_norm': 0.6675615097627332, 'learning_rate': 9.377378999452553e-06, 'epoch': 0.19} + 19%|█▊ | 2265/12188 [4:59:00<19:51:03, 7.20s/it] 19%|█▊ | 2266/12188 [4:59:07<19:40:07, 7.14s/it] {'loss': 0.401, 'grad_norm': 0.7273131848756691, 'learning_rate': 9.376736731791785e-06, 'epoch': 0.19} + 19%|█▊ | 2266/12188 [4:59:07<19:40:07, 7.14s/it] 19%|█▊ | 2267/12188 [4:59:15<19:48:16, 7.19s/it] {'loss': 0.3752, 'grad_norm': 0.6356609381217574, 'learning_rate': 9.3760941550529e-06, 'epoch': 0.19} + 19%|█▊ | 2267/12188 [4:59:15<19:48:16, 7.19s/it] 19%|█▊ | 2268/12188 [4:59:21<19:30:07, 7.08s/it] {'loss': 0.3688, 'grad_norm': 0.7384666900295588, 'learning_rate': 9.375451269281276e-06, 'epoch': 0.19} + 19%|█▊ | 2268/12188 [4:59:21<19:30:07, 7.08s/it] 19%|█▊ | 2269/12188 [4:59:28<19:31:03, 7.08s/it] {'loss': 0.3995, 'grad_norm': 0.6932047602509236, 'learning_rate': 9.374808074522313e-06, 'epoch': 0.19} + 19%|█▊ | 2269/12188 [4:59:28<19:31:03, 7.08s/it] 19%|█▊ | 2270/12188 [4:59:36<19:40:47, 7.14s/it] {'loss': 0.3805, 'grad_norm': 0.6834944352460937, 'learning_rate': 9.374164570821433e-06, 'epoch': 0.19} + 19%|█▊ | 2270/12188 [4:59:36<19:40:47, 7.14s/it] 19%|█▊ | 2271/12188 [4:59:42<19:08:00, 6.95s/it] {'loss': 0.4145, 'grad_norm': 0.7277940075119779, 'learning_rate': 9.37352075822408e-06, 'epoch': 0.19} + 19%|█▊ | 2271/12188 [4:59:42<19:08:00, 6.95s/it] 19%|█▊ | 2272/12188 [4:59:50<19:30:34, 7.08s/it] {'loss': 0.3835, 'grad_norm': 0.6531854889773601, 'learning_rate': 9.372876636775716e-06, 'epoch': 0.19} + 19%|█▊ | 2272/12188 [4:59:50<19:30:34, 7.08s/it] 19%|█▊ | 2273/12188 [4:59:56<19:11:24, 6.97s/it] {'loss': 0.3808, 'grad_norm': 0.7024824429776769, 'learning_rate': 9.372232206521832e-06, 'epoch': 0.19} + 19%|█▊ | 2273/12188 [4:59:56<19:11:24, 6.97s/it] 19%|█▊ | 2274/12188 [5:00:03<18:51:18, 6.85s/it] {'loss': 0.3643, 'grad_norm': 1.000322208161805, 'learning_rate': 9.37158746750793e-06, 'epoch': 0.19} + 19%|█▊ | 2274/12188 [5:00:03<18:51:18, 6.85s/it] 19%|█▊ | 2275/12188 [5:00:11<20:08:01, 7.31s/it] {'loss': 0.3461, 'grad_norm': 0.6118338887591332, 'learning_rate': 9.370942419779547e-06, 'epoch': 0.19} + 19%|█▊ | 2275/12188 [5:00:11<20:08:01, 7.31s/it] 19%|█▊ | 2276/12188 [5:00:18<20:03:21, 7.28s/it] {'loss': 0.3838, 'grad_norm': 0.6173971655520525, 'learning_rate': 9.370297063382235e-06, 'epoch': 0.19} + 19%|█▊ | 2276/12188 [5:00:18<20:03:21, 7.28s/it] 19%|█▊ | 2277/12188 [5:00:27<20:55:17, 7.60s/it] {'loss': 0.4239, 'grad_norm': 0.6511222196648068, 'learning_rate': 9.369651398361562e-06, 'epoch': 0.19} + 19%|█▊ | 2277/12188 [5:00:27<20:55:17, 7.60s/it] 19%|█▊ | 2278/12188 [5:00:34<20:44:09, 7.53s/it] {'loss': 0.4515, 'grad_norm': 0.678468967577286, 'learning_rate': 9.369005424763128e-06, 'epoch': 0.19} + 19%|█▊ | 2278/12188 [5:00:34<20:44:09, 7.53s/it] 19%|█▊ | 2279/12188 [5:00:41<20:12:40, 7.34s/it] {'loss': 0.4065, 'grad_norm': 0.6392780539789913, 'learning_rate': 9.368359142632553e-06, 'epoch': 0.19} + 19%|█▊ | 2279/12188 [5:00:41<20:12:40, 7.34s/it] 19%|█▊ | 2280/12188 [5:00:48<19:54:49, 7.24s/it] {'loss': 0.3816, 'grad_norm': 0.6127404658287932, 'learning_rate': 9.367712552015471e-06, 'epoch': 0.19} + 19%|█▊ | 2280/12188 [5:00:48<19:54:49, 7.24s/it] 19%|█▊ | 2281/12188 [5:00:58<22:26:26, 8.15s/it] {'loss': 0.3731, 'grad_norm': 0.6449515390467405, 'learning_rate': 9.367065652957546e-06, 'epoch': 0.19} + 19%|█▊ | 2281/12188 [5:00:58<22:26:26, 8.15s/it] 19%|█▊ | 2282/12188 [5:01:06<22:14:10, 8.08s/it] {'loss': 0.4252, 'grad_norm': 0.670470114390444, 'learning_rate': 9.366418445504461e-06, 'epoch': 0.19} + 19%|█▊ | 2282/12188 [5:01:06<22:14:10, 8.08s/it] 19%|█▊ | 2283/12188 [5:01:14<21:33:24, 7.83s/it] {'loss': 0.3954, 'grad_norm': 0.677813016240396, 'learning_rate': 9.365770929701924e-06, 'epoch': 0.19} + 19%|█▊ | 2283/12188 [5:01:14<21:33:24, 7.83s/it] 19%|█▊ | 2284/12188 [5:01:21<21:01:17, 7.64s/it] {'loss': 0.3835, 'grad_norm': 0.636291147168594, 'learning_rate': 9.365123105595655e-06, 'epoch': 0.19} + 19%|█▊ | 2284/12188 [5:01:21<21:01:17, 7.64s/it] 19%|█▊ | 2285/12188 [5:01:29<21:37:15, 7.86s/it] {'loss': 0.3433, 'grad_norm': 0.6837155224780271, 'learning_rate': 9.364474973231404e-06, 'epoch': 0.19} + 19%|█▊ | 2285/12188 [5:01:29<21:37:15, 7.86s/it] 19%|█▉ | 2286/12188 [5:01:36<21:01:23, 7.64s/it] {'loss': 0.3896, 'grad_norm': 0.6220463901083001, 'learning_rate': 9.363826532654945e-06, 'epoch': 0.19} + 19%|█▉ | 2286/12188 [5:01:36<21:01:23, 7.64s/it] 19%|█▉ | 2287/12188 [5:01:44<20:56:15, 7.61s/it] {'loss': 0.3947, 'grad_norm': 0.6691651982497119, 'learning_rate': 9.363177783912065e-06, 'epoch': 0.19} + 19%|█▉ | 2287/12188 [5:01:44<20:56:15, 7.61s/it] 19%|█▉ | 2288/12188 [5:01:51<20:28:29, 7.45s/it] {'loss': 0.4017, 'grad_norm': 0.6240319823007835, 'learning_rate': 9.362528727048581e-06, 'epoch': 0.19} + 19%|█▉ | 2288/12188 [5:01:51<20:28:29, 7.45s/it] 19%|█▉ | 2289/12188 [5:01:58<19:57:38, 7.26s/it] {'loss': 0.3781, 'grad_norm': 0.6493122935859134, 'learning_rate': 9.361879362110329e-06, 'epoch': 0.19} + 19%|█▉ | 2289/12188 [5:01:58<19:57:38, 7.26s/it] 19%|█▉ | 2290/12188 [5:02:06<20:34:04, 7.48s/it] {'loss': 0.3995, 'grad_norm': 0.6149076413458776, 'learning_rate': 9.361229689143162e-06, 'epoch': 0.19} + 19%|█▉ | 2290/12188 [5:02:06<20:34:04, 7.48s/it] 19%|█▉ | 2291/12188 [5:02:13<20:49:38, 7.58s/it] {'loss': 0.3527, 'grad_norm': 0.6733927035094591, 'learning_rate': 9.360579708192962e-06, 'epoch': 0.19} + 19%|█▉ | 2291/12188 [5:02:13<20:49:38, 7.58s/it] 19%|█▉ | 2292/12188 [5:02:20<20:07:26, 7.32s/it] {'loss': 0.4123, 'grad_norm': 0.6866269485961236, 'learning_rate': 9.359929419305628e-06, 'epoch': 0.19} + 19%|█▉ | 2292/12188 [5:02:20<20:07:26, 7.32s/it] 19%|█▉ | 2293/12188 [5:02:28<20:24:21, 7.42s/it] {'loss': 0.4001, 'grad_norm': 0.7172255950777638, 'learning_rate': 9.359278822527085e-06, 'epoch': 0.19} + 19%|█▉ | 2293/12188 [5:02:28<20:24:21, 7.42s/it] 19%|█▉ | 2294/12188 [5:02:35<20:05:24, 7.31s/it] {'loss': 0.3792, 'grad_norm': 0.656060969524315, 'learning_rate': 9.358627917903273e-06, 'epoch': 0.19} + 19%|█▉ | 2294/12188 [5:02:35<20:05:24, 7.31s/it] 19%|█▉ | 2295/12188 [5:02:41<19:28:08, 7.08s/it] {'loss': 0.3411, 'grad_norm': 0.6836726409283856, 'learning_rate': 9.357976705480162e-06, 'epoch': 0.19} + 19%|█▉ | 2295/12188 [5:02:41<19:28:08, 7.08s/it] 19%|█▉ | 2296/12188 [5:02:48<19:00:13, 6.92s/it] {'loss': 0.4449, 'grad_norm': 0.6703446505008837, 'learning_rate': 9.357325185303737e-06, 'epoch': 0.19} + 19%|█▉ | 2296/12188 [5:02:48<19:00:13, 6.92s/it] 19%|█▉ | 2297/12188 [5:02:55<18:45:30, 6.83s/it] {'loss': 0.3811, 'grad_norm': 0.7571872760995528, 'learning_rate': 9.356673357420007e-06, 'epoch': 0.19} + 19%|█▉ | 2297/12188 [5:02:55<18:45:30, 6.83s/it] 19%|█▉ | 2298/12188 [5:03:03<20:06:02, 7.32s/it] {'loss': 0.386, 'grad_norm': 0.617408696652861, 'learning_rate': 9.356021221875008e-06, 'epoch': 0.19} + 19%|█▉ | 2298/12188 [5:03:03<20:06:02, 7.32s/it] 19%|█▉ | 2299/12188 [5:03:09<19:22:55, 7.06s/it] {'loss': 0.3741, 'grad_norm': 0.6564785386615719, 'learning_rate': 9.355368778714784e-06, 'epoch': 0.19} + 19%|█▉ | 2299/12188 [5:03:10<19:22:55, 7.06s/it] 19%|█▉ | 2300/12188 [5:03:17<19:28:07, 7.09s/it] {'loss': 0.3598, 'grad_norm': 0.5501074663949519, 'learning_rate': 9.354716027985415e-06, 'epoch': 0.19} + 19%|█▉ | 2300/12188 [5:03:17<19:28:07, 7.09s/it] 19%|█▉ | 2301/12188 [5:03:24<19:32:00, 7.11s/it] {'loss': 0.3644, 'grad_norm': 0.6452070966471581, 'learning_rate': 9.354062969732998e-06, 'epoch': 0.19} + 19%|█▉ | 2301/12188 [5:03:24<19:32:00, 7.11s/it] 19%|█▉ | 2302/12188 [5:03:31<19:11:02, 6.99s/it] {'loss': 0.4031, 'grad_norm': 0.6993197176344447, 'learning_rate': 9.353409604003649e-06, 'epoch': 0.19} + 19%|█▉ | 2302/12188 [5:03:31<19:11:02, 6.99s/it] 19%|█▉ | 2303/12188 [5:03:37<19:01:18, 6.93s/it] {'loss': 0.3776, 'grad_norm': 0.6750433056768211, 'learning_rate': 9.35275593084351e-06, 'epoch': 0.19} + 19%|█▉ | 2303/12188 [5:03:37<19:01:18, 6.93s/it] 19%|█▉ | 2304/12188 [5:03:45<20:02:20, 7.30s/it] {'loss': 0.4258, 'grad_norm': 0.6521842431324694, 'learning_rate': 9.352101950298738e-06, 'epoch': 0.19} + 19%|█▉ | 2304/12188 [5:03:45<20:02:20, 7.30s/it] 19%|█▉ | 2305/12188 [5:03:52<19:35:56, 7.14s/it] {'loss': 0.4106, 'grad_norm': 0.6692348451482892, 'learning_rate': 9.351447662415518e-06, 'epoch': 0.19} + 19%|█▉ | 2305/12188 [5:03:52<19:35:56, 7.14s/it] 19%|█▉ | 2306/12188 [5:03:59<19:39:48, 7.16s/it] {'loss': 0.3824, 'grad_norm': 0.6219592900216891, 'learning_rate': 9.350793067240057e-06, 'epoch': 0.19} + 19%|█▉ | 2306/12188 [5:03:59<19:39:48, 7.16s/it] 19%|█▉ | 2307/12188 [5:04:07<20:15:58, 7.38s/it] {'loss': 0.3978, 'grad_norm': 0.676789578999462, 'learning_rate': 9.350138164818579e-06, 'epoch': 0.19} + 19%|█▉ | 2307/12188 [5:04:07<20:15:58, 7.38s/it] 19%|█▉ | 2308/12188 [5:04:14<19:49:42, 7.22s/it] {'loss': 0.3579, 'grad_norm': 0.6263081341237644, 'learning_rate': 9.349482955197332e-06, 'epoch': 0.19} + 19%|█▉ | 2308/12188 [5:04:14<19:49:42, 7.22s/it] 19%|█▉ | 2309/12188 [5:04:21<19:22:58, 7.06s/it] {'loss': 0.4295, 'grad_norm': 0.6162221441845724, 'learning_rate': 9.348827438422588e-06, 'epoch': 0.19} + 19%|█▉ | 2309/12188 [5:04:21<19:22:58, 7.06s/it] 19%|█▉ | 2310/12188 [5:04:28<19:45:57, 7.20s/it] {'loss': 0.3619, 'grad_norm': 0.6318641776423526, 'learning_rate': 9.348171614540635e-06, 'epoch': 0.19} + 19%|█▉ | 2310/12188 [5:04:28<19:45:57, 7.20s/it] 19%|█▉ | 2311/12188 [5:04:35<19:13:12, 7.01s/it] {'loss': 0.4025, 'grad_norm': 0.6654406810458581, 'learning_rate': 9.34751548359779e-06, 'epoch': 0.19} + 19%|█▉ | 2311/12188 [5:04:35<19:13:12, 7.01s/it] 19%|█▉ | 2312/12188 [5:04:42<19:02:23, 6.94s/it] {'loss': 0.3862, 'grad_norm': 0.6344833745203432, 'learning_rate': 9.346859045640386e-06, 'epoch': 0.19} + 19%|█▉ | 2312/12188 [5:04:42<19:02:23, 6.94s/it] 19%|█▉ | 2313/12188 [5:04:49<18:53:19, 6.89s/it] {'loss': 0.3531, 'grad_norm': 0.6446421225136829, 'learning_rate': 9.346202300714781e-06, 'epoch': 0.19} + 19%|█▉ | 2313/12188 [5:04:49<18:53:19, 6.89s/it] 19%|█▉ | 2314/12188 [5:04:55<18:43:51, 6.83s/it] {'loss': 0.3829, 'grad_norm': 0.6313985964154566, 'learning_rate': 9.345545248867352e-06, 'epoch': 0.19} + 19%|█▉ | 2314/12188 [5:04:55<18:43:51, 6.83s/it] 19%|█▉ | 2315/12188 [5:05:02<18:38:09, 6.80s/it] {'loss': 0.3748, 'grad_norm': 0.6200295342403026, 'learning_rate': 9.344887890144499e-06, 'epoch': 0.19} + 19%|█▉ | 2315/12188 [5:05:02<18:38:09, 6.80s/it] 19%|█▉ | 2316/12188 [5:05:09<18:41:43, 6.82s/it] {'loss': 0.3631, 'grad_norm': 1.105055352895138, 'learning_rate': 9.344230224592642e-06, 'epoch': 0.19} + 19%|█▉ | 2316/12188 [5:05:09<18:41:43, 6.82s/it] 19%|█▉ | 2317/12188 [5:05:16<19:17:03, 7.03s/it] {'loss': 0.3969, 'grad_norm': 0.6157647470344825, 'learning_rate': 9.343572252258228e-06, 'epoch': 0.19} + 19%|█▉ | 2317/12188 [5:05:16<19:17:03, 7.03s/it] 19%|█▉ | 2318/12188 [5:05:23<19:11:46, 7.00s/it] {'loss': 0.4063, 'grad_norm': 0.6661407728938531, 'learning_rate': 9.34291397318772e-06, 'epoch': 0.19} + 19%|█▉ | 2318/12188 [5:05:23<19:11:46, 7.00s/it] 19%|█▉ | 2319/12188 [5:05:31<19:28:24, 7.10s/it] {'loss': 0.3727, 'grad_norm': 0.6703460683806264, 'learning_rate': 9.342255387427604e-06, 'epoch': 0.19} + 19%|█▉ | 2319/12188 [5:05:31<19:28:24, 7.10s/it] 19%|█▉ | 2320/12188 [5:05:41<21:49:39, 7.96s/it] {'loss': 0.379, 'grad_norm': 0.6617965736360988, 'learning_rate': 9.34159649502439e-06, 'epoch': 0.19} + 19%|█▉ | 2320/12188 [5:05:41<21:49:39, 7.96s/it] 19%|█▉ | 2321/12188 [5:05:47<20:53:47, 7.62s/it] {'loss': 0.3582, 'grad_norm': 0.6466904126275137, 'learning_rate': 9.340937296024605e-06, 'epoch': 0.19} + 19%|█▉ | 2321/12188 [5:05:47<20:53:47, 7.62s/it] 19%|█▉ | 2322/12188 [5:05:55<20:43:25, 7.56s/it] {'loss': 0.3548, 'grad_norm': 0.6241798643953657, 'learning_rate': 9.340277790474804e-06, 'epoch': 0.19} + 19%|█▉ | 2322/12188 [5:05:55<20:43:25, 7.56s/it] 19%|█▉ | 2323/12188 [5:06:02<20:47:57, 7.59s/it] {'loss': 0.4049, 'grad_norm': 0.6142666862581382, 'learning_rate': 9.339617978421558e-06, 'epoch': 0.19} + 19%|█▉ | 2323/12188 [5:06:02<20:47:57, 7.59s/it] 19%|█▉ | 2324/12188 [5:06:09<19:53:02, 7.26s/it] {'loss': 0.4203, 'grad_norm': 0.6103447822310806, 'learning_rate': 9.338957859911462e-06, 'epoch': 0.19} + 19%|█▉ | 2324/12188 [5:06:09<19:53:02, 7.26s/it] 19%|█▉ | 2325/12188 [5:06:18<21:18:35, 7.78s/it] {'loss': 0.3641, 'grad_norm': 0.7373588364415218, 'learning_rate': 9.338297434991134e-06, 'epoch': 0.19} + 19%|█▉ | 2325/12188 [5:06:18<21:18:35, 7.78s/it] 19%|█▉ | 2326/12188 [5:06:25<20:33:10, 7.50s/it] {'loss': 0.3988, 'grad_norm': 0.6487574283133665, 'learning_rate': 9.33763670370721e-06, 'epoch': 0.19} + 19%|█▉ | 2326/12188 [5:06:25<20:33:10, 7.50s/it] 19%|█▉ | 2327/12188 [5:06:33<21:12:22, 7.74s/it] {'loss': 0.3709, 'grad_norm': 0.6258202448480539, 'learning_rate': 9.336975666106353e-06, 'epoch': 0.19} + 19%|█▉ | 2327/12188 [5:06:33<21:12:22, 7.74s/it] 19%|█▉ | 2328/12188 [5:06:40<20:42:35, 7.56s/it] {'loss': 0.3848, 'grad_norm': 0.6526014575649657, 'learning_rate': 9.33631432223524e-06, 'epoch': 0.19} + 19%|█▉ | 2328/12188 [5:06:40<20:42:35, 7.56s/it] 19%|█▉ | 2329/12188 [5:06:48<20:43:13, 7.57s/it] {'loss': 0.3999, 'grad_norm': 0.6077719393611979, 'learning_rate': 9.335652672140577e-06, 'epoch': 0.19} + 19%|█▉ | 2329/12188 [5:06:48<20:43:13, 7.57s/it] 19%|█▉ | 2330/12188 [5:06:55<20:31:54, 7.50s/it] {'loss': 0.435, 'grad_norm': 0.6530432971866037, 'learning_rate': 9.33499071586909e-06, 'epoch': 0.19} + 19%|█▉ | 2330/12188 [5:06:55<20:31:54, 7.50s/it] 19%|█▉ | 2331/12188 [5:07:02<19:48:57, 7.24s/it] {'loss': 0.3653, 'grad_norm': 0.6852933491871285, 'learning_rate': 9.334328453467521e-06, 'epoch': 0.19} + 19%|█▉ | 2331/12188 [5:07:02<19:48:57, 7.24s/it] 19%|█▉ | 2332/12188 [5:07:09<20:05:22, 7.34s/it] {'loss': 0.3554, 'grad_norm': 0.5951626098623948, 'learning_rate': 9.333665884982642e-06, 'epoch': 0.19} + 19%|█▉ | 2332/12188 [5:07:09<20:05:22, 7.34s/it] 19%|█▉ | 2333/12188 [5:07:18<20:51:30, 7.62s/it] {'loss': 0.4332, 'grad_norm': 0.6894792253496951, 'learning_rate': 9.333003010461238e-06, 'epoch': 0.19} + 19%|█▉ | 2333/12188 [5:07:18<20:51:30, 7.62s/it] 19%|█▉ | 2334/12188 [5:07:25<20:35:47, 7.52s/it] {'loss': 0.4233, 'grad_norm': 0.6670601335952702, 'learning_rate': 9.332339829950126e-06, 'epoch': 0.19} + 19%|█▉ | 2334/12188 [5:07:25<20:35:47, 7.52s/it] 19%|█▉ | 2335/12188 [5:07:32<19:47:23, 7.23s/it] {'loss': 0.4109, 'grad_norm': 0.6337882942508766, 'learning_rate': 9.331676343496135e-06, 'epoch': 0.19} + 19%|█▉ | 2335/12188 [5:07:32<19:47:23, 7.23s/it] 19%|█▉ | 2336/12188 [5:07:39<20:22:27, 7.44s/it] {'loss': 0.4293, 'grad_norm': 0.6501163921099293, 'learning_rate': 9.331012551146118e-06, 'epoch': 0.19} + 19%|█▉ | 2336/12188 [5:07:39<20:22:27, 7.44s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fcb29e5c130> +[Try #0] Failed to fetch sample 4867840 in VC:s3://gui/OS-Atlas/desktop_domain/linux_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fcb29e5c130> +Problematic sample: {'image': 'output_20240912_153123_original_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on '跳至本页面的“合而为一”部分'"}, {'from': 'gpt', 'value': '\nclick(x=0.0934, y=0.3285)\n'}]} + 19%|█▉ | 2337/12188 [5:07:47<20:30:43, 7.50s/it] {'loss': 0.3442, 'grad_norm': 0.6429138918948967, 'learning_rate': 9.330348452946954e-06, 'epoch': 0.19} + 19%|█▉ | 2337/12188 [5:07:47<20:30:43, 7.50s/it] 19%|█▉ | 2338/12188 [5:07:54<20:09:38, 7.37s/it] {'loss': 0.3874, 'grad_norm': 0.6513355939967174, 'learning_rate': 9.329684048945539e-06, 'epoch': 0.19} + 19%|█▉ | 2338/12188 [5:07:54<20:09:38, 7.37s/it] 19%|█▉ | 2339/12188 [5:08:02<20:56:09, 7.65s/it] {'loss': 0.431, 'grad_norm': 0.6791273565491414, 'learning_rate': 9.329019339188794e-06, 'epoch': 0.19} + 19%|█▉ | 2339/12188 [5:08:02<20:56:09, 7.65s/it] 19%|█▉ | 2340/12188 [5:08:10<20:29:34, 7.49s/it] {'loss': 0.425, 'grad_norm': 0.6672639844251973, 'learning_rate': 9.328354323723659e-06, 'epoch': 0.19} + 19%|█▉ | 2340/12188 [5:08:10<20:29:34, 7.49s/it] 19%|█▉ | 2341/12188 [5:08:17<20:48:46, 7.61s/it] {'loss': 0.371, 'grad_norm': 0.6235551051809817, 'learning_rate': 9.327689002597093e-06, 'epoch': 0.19} + 19%|█▉ | 2341/12188 [5:08:17<20:48:46, 7.61s/it] 19%|█▉ | 2342/12188 [5:08:24<20:15:22, 7.41s/it] {'loss': 0.3625, 'grad_norm': 0.6012436315626537, 'learning_rate': 9.327023375856085e-06, 'epoch': 0.19} + 19%|█▉ | 2342/12188 [5:08:24<20:15:22, 7.41s/it] 19%|█▉ | 2343/12188 [5:08:31<19:39:53, 7.19s/it] {'loss': 0.3514, 'grad_norm': 0.645288267973353, 'learning_rate': 9.326357443547636e-06, 'epoch': 0.19} + 19%|█▉ | 2343/12188 [5:08:31<19:39:53, 7.19s/it] 19%|█▉ | 2344/12188 [5:08:38<19:35:40, 7.17s/it] {'loss': 0.3767, 'grad_norm': 0.6363989557059667, 'learning_rate': 9.325691205718777e-06, 'epoch': 0.19} + 19%|█▉ | 2344/12188 [5:08:38<19:35:40, 7.17s/it] 19%|█▉ | 2345/12188 [5:08:45<19:40:43, 7.20s/it] {'loss': 0.3657, 'grad_norm': 0.660560340225214, 'learning_rate': 9.325024662416553e-06, 'epoch': 0.19} + 19%|█▉ | 2345/12188 [5:08:45<19:40:43, 7.20s/it] 19%|█▉ | 2346/12188 [5:08:52<19:28:11, 7.12s/it] {'loss': 0.37, 'grad_norm': 0.6721760596889514, 'learning_rate': 9.324357813688037e-06, 'epoch': 0.19} + 19%|█▉ | 2346/12188 [5:08:52<19:28:11, 7.12s/it] 19%|█▉ | 2347/12188 [5:08:59<19:03:44, 6.97s/it] {'loss': 0.4077, 'grad_norm': 0.6834240895159485, 'learning_rate': 9.32369065958032e-06, 'epoch': 0.19} + 19%|█▉ | 2347/12188 [5:08:59<19:03:44, 6.97s/it] 19%|█▉ | 2348/12188 [5:09:07<19:53:51, 7.28s/it] {'loss': 0.3588, 'grad_norm': 0.6075540351343719, 'learning_rate': 9.323023200140516e-06, 'epoch': 0.19} + 19%|█▉ | 2348/12188 [5:09:07<19:53:51, 7.28s/it] 19%|█▉ | 2349/12188 [5:09:14<19:53:42, 7.28s/it] {'loss': 0.3796, 'grad_norm': 0.5984516730170012, 'learning_rate': 9.322355435415758e-06, 'epoch': 0.19} + 19%|█▉ | 2349/12188 [5:09:14<19:53:42, 7.28s/it] 19%|█▉ | 2350/12188 [5:09:22<20:02:34, 7.33s/it] {'loss': 0.4025, 'grad_norm': 0.7086851352381045, 'learning_rate': 9.321687365453202e-06, 'epoch': 0.19} + 19%|█▉ | 2350/12188 [5:09:22<20:02:34, 7.33s/it] 19%|█▉ | 2351/12188 [5:09:29<19:42:37, 7.21s/it] {'loss': 0.384, 'grad_norm': 0.6986547697987913, 'learning_rate': 9.32101899030003e-06, 'epoch': 0.19} + 19%|█▉ | 2351/12188 [5:09:29<19:42:37, 7.21s/it] 19%|█▉ | 2352/12188 [5:09:36<19:23:09, 7.10s/it] {'loss': 0.4185, 'grad_norm': 0.6773073365420988, 'learning_rate': 9.320350310003438e-06, 'epoch': 0.19} + 19%|█▉ | 2352/12188 [5:09:36<19:23:09, 7.10s/it] 19%|█▉ | 2353/12188 [5:09:43<19:30:19, 7.14s/it] {'loss': 0.3788, 'grad_norm': 0.6432950407371643, 'learning_rate': 9.31968132461065e-06, 'epoch': 0.19} + 19%|█▉ | 2353/12188 [5:09:43<19:30:19, 7.14s/it] 19%|█▉ | 2354/12188 [5:09:49<19:08:37, 7.01s/it] {'loss': 0.3801, 'grad_norm': 0.6075904401925407, 'learning_rate': 9.319012034168905e-06, 'epoch': 0.19} + 19%|█▉ | 2354/12188 [5:09:49<19:08:37, 7.01s/it] 19%|█▉ | 2355/12188 [5:09:56<19:01:15, 6.96s/it] {'loss': 0.3972, 'grad_norm': 0.6439171969927543, 'learning_rate': 9.318342438725469e-06, 'epoch': 0.19} + 19%|█▉ | 2355/12188 [5:09:56<19:01:15, 6.96s/it] 19%|█▉ | 2356/12188 [5:10:03<18:55:12, 6.93s/it] {'loss': 0.4119, 'grad_norm': 0.6469349463673484, 'learning_rate': 9.31767253832763e-06, 'epoch': 0.19} + 19%|█▉ | 2356/12188 [5:10:03<18:55:12, 6.93s/it] 19%|█▉ | 2357/12188 [5:10:10<18:54:37, 6.92s/it] {'loss': 0.4078, 'grad_norm': 0.6343675366354676, 'learning_rate': 9.317002333022692e-06, 'epoch': 0.19} + 19%|█▉ | 2357/12188 [5:10:10<18:54:37, 6.92s/it] 19%|█▉ | 2358/12188 [5:10:17<18:58:57, 6.95s/it] {'loss': 0.3951, 'grad_norm': 0.5995391965320391, 'learning_rate': 9.316331822857984e-06, 'epoch': 0.19} + 19%|█▉ | 2358/12188 [5:10:17<18:58:57, 6.95s/it] 19%|█▉ | 2359/12188 [5:10:24<18:54:57, 6.93s/it] {'loss': 0.3713, 'grad_norm': 0.6118923486469612, 'learning_rate': 9.315661007880858e-06, 'epoch': 0.19} + 19%|█▉ | 2359/12188 [5:10:24<18:54:57, 6.93s/it] 19%|█▉ | 2360/12188 [5:10:31<18:50:08, 6.90s/it] {'loss': 0.3838, 'grad_norm': 0.6073245728495696, 'learning_rate': 9.314989888138687e-06, 'epoch': 0.19} + 19%|█▉ | 2360/12188 [5:10:31<18:50:08, 6.90s/it] 19%|█▉ | 2361/12188 [5:10:38<19:19:58, 7.08s/it] {'loss': 0.3575, 'grad_norm': 0.6018354857740531, 'learning_rate': 9.31431846367886e-06, 'epoch': 0.19} + 19%|█▉ | 2361/12188 [5:10:38<19:19:58, 7.08s/it] 19%|█▉ | 2362/12188 [5:10:46<20:07:41, 7.37s/it] {'loss': 0.3992, 'grad_norm': 0.6245305670153241, 'learning_rate': 9.313646734548797e-06, 'epoch': 0.19} + 19%|█▉ | 2362/12188 [5:10:46<20:07:41, 7.37s/it] 19%|█▉ | 2363/12188 [5:10:53<19:54:59, 7.30s/it] {'loss': 0.3795, 'grad_norm': 0.666903812854981, 'learning_rate': 9.312974700795932e-06, 'epoch': 0.19} + 19%|█▉ | 2363/12188 [5:10:53<19:54:59, 7.30s/it] 19%|█▉ | 2364/12188 [5:11:01<20:02:23, 7.34s/it] {'loss': 0.3955, 'grad_norm': 0.6582225024748427, 'learning_rate': 9.31230236246772e-06, 'epoch': 0.19} + 19%|█▉ | 2364/12188 [5:11:01<20:02:23, 7.34s/it] 19%|█▉ | 2365/12188 [5:11:08<19:35:16, 7.18s/it] {'loss': 0.3921, 'grad_norm': 0.6691318717690604, 'learning_rate': 9.311629719611645e-06, 'epoch': 0.19} + 19%|█▉ | 2365/12188 [5:11:08<19:35:16, 7.18s/it] 19%|█▉ | 2366/12188 [5:11:14<19:07:38, 7.01s/it] {'loss': 0.3727, 'grad_norm': 0.6072426756093179, 'learning_rate': 9.310956772275208e-06, 'epoch': 0.19} + 19%|█▉ | 2366/12188 [5:11:14<19:07:38, 7.01s/it] 19%|█▉ | 2367/12188 [5:11:21<18:54:35, 6.93s/it] {'loss': 0.3929, 'grad_norm': 0.66605317716138, 'learning_rate': 9.310283520505927e-06, 'epoch': 0.19} + 19%|█▉ | 2367/12188 [5:11:21<18:54:35, 6.93s/it] 19%|█▉ | 2368/12188 [5:11:28<18:57:24, 6.95s/it] {'loss': 0.3729, 'grad_norm': 0.636609286105985, 'learning_rate': 9.30960996435135e-06, 'epoch': 0.19} + 19%|█▉ | 2368/12188 [5:11:28<18:57:24, 6.95s/it] 19%|█▉ | 2369/12188 [5:11:36<19:21:36, 7.10s/it] {'loss': 0.3946, 'grad_norm': 0.6751185362791333, 'learning_rate': 9.308936103859041e-06, 'epoch': 0.19} + 19%|█▉ | 2369/12188 [5:11:36<19:21:36, 7.10s/it] 19%|█▉ | 2370/12188 [5:11:43<19:44:42, 7.24s/it] {'loss': 0.3851, 'grad_norm': 0.6366011292783129, 'learning_rate': 9.308261939076587e-06, 'epoch': 0.19} + 19%|█▉ | 2370/12188 [5:11:43<19:44:42, 7.24s/it] 19%|█▉ | 2371/12188 [5:11:50<19:29:42, 7.15s/it] {'loss': 0.3858, 'grad_norm': 0.6875852001728366, 'learning_rate': 9.3075874700516e-06, 'epoch': 0.19} + 19%|█▉ | 2371/12188 [5:11:50<19:29:42, 7.15s/it] 19%|█▉ | 2372/12188 [5:11:57<19:28:07, 7.14s/it] {'loss': 0.393, 'grad_norm': 0.6399271283474355, 'learning_rate': 9.306912696831702e-06, 'epoch': 0.19} + 19%|█▉ | 2372/12188 [5:11:57<19:28:07, 7.14s/it] 19%|█▉ | 2373/12188 [5:12:04<19:08:00, 7.02s/it] {'loss': 0.3749, 'grad_norm': 0.6527654766933841, 'learning_rate': 9.306237619464551e-06, 'epoch': 0.19} + 19%|█▉ | 2373/12188 [5:12:04<19:08:00, 7.02s/it] 19%|█▉ | 2374/12188 [5:12:11<19:26:04, 7.13s/it] {'loss': 0.3581, 'grad_norm': 0.6206295270930102, 'learning_rate': 9.305562237997819e-06, 'epoch': 0.19} + 19%|█▉ | 2374/12188 [5:12:11<19:26:04, 7.13s/it] 19%|█▉ | 2375/12188 [5:12:18<19:26:57, 7.14s/it] {'loss': 0.3984, 'grad_norm': 0.684269813194962, 'learning_rate': 9.304886552479196e-06, 'epoch': 0.19} + 19%|█▉ | 2375/12188 [5:12:18<19:26:57, 7.14s/it] 19%|█▉ | 2376/12188 [5:12:26<19:34:31, 7.18s/it] {'loss': 0.4198, 'grad_norm': 0.5669441430237632, 'learning_rate': 9.304210562956403e-06, 'epoch': 0.19} + 19%|█▉ | 2376/12188 [5:12:26<19:34:31, 7.18s/it] 20%|█▉ | 2377/12188 [5:12:33<19:22:22, 7.11s/it] {'loss': 0.3938, 'grad_norm': 0.6005508642710212, 'learning_rate': 9.303534269477174e-06, 'epoch': 0.2} + 20%|█▉ | 2377/12188 [5:12:33<19:22:22, 7.11s/it] 20%|█▉ | 2378/12188 [5:12:39<18:58:53, 6.97s/it] {'loss': 0.3942, 'grad_norm': 0.6283032062849251, 'learning_rate': 9.302857672089272e-06, 'epoch': 0.2} + 20%|█▉ | 2378/12188 [5:12:39<18:58:53, 6.97s/it] 20%|█▉ | 2379/12188 [5:12:47<19:25:46, 7.13s/it] {'loss': 0.3635, 'grad_norm': 0.6652222170410326, 'learning_rate': 9.302180770840471e-06, 'epoch': 0.2} + 20%|█▉ | 2379/12188 [5:12:47<19:25:46, 7.13s/it] 20%|█▉ | 2380/12188 [5:12:56<20:59:31, 7.71s/it] {'loss': 0.3863, 'grad_norm': 0.7343818972158671, 'learning_rate': 9.301503565778578e-06, 'epoch': 0.2} + 20%|█▉ | 2380/12188 [5:12:56<20:59:31, 7.71s/it] 20%|█▉ | 2381/12188 [5:13:03<20:41:18, 7.59s/it] {'loss': 0.402, 'grad_norm': 0.6742437382952083, 'learning_rate': 9.300826056951412e-06, 'epoch': 0.2} + 20%|█▉ | 2381/12188 [5:13:03<20:41:18, 7.59s/it] 20%|█▉ | 2382/12188 [5:13:11<20:39:54, 7.59s/it] {'loss': 0.3632, 'grad_norm': 0.6968039245131766, 'learning_rate': 9.30014824440682e-06, 'epoch': 0.2} + 20%|█▉ | 2382/12188 [5:13:11<20:39:54, 7.59s/it] 20%|█▉ | 2383/12188 [5:13:18<20:32:08, 7.54s/it] {'loss': 0.3414, 'grad_norm': 0.6279828665406602, 'learning_rate': 9.299470128192671e-06, 'epoch': 0.2} + 20%|█▉ | 2383/12188 [5:13:18<20:32:08, 7.54s/it] 20%|█▉ | 2384/12188 [5:13:25<20:04:06, 7.37s/it] {'loss': 0.3907, 'grad_norm': 0.6983501511485233, 'learning_rate': 9.298791708356845e-06, 'epoch': 0.2} + 20%|█▉ | 2384/12188 [5:13:25<20:04:06, 7.37s/it] 20%|█▉ | 2385/12188 [5:13:32<19:55:11, 7.32s/it] {'loss': 0.3933, 'grad_norm': 0.6623804825395393, 'learning_rate': 9.298112984947258e-06, 'epoch': 0.2} + 20%|█▉ | 2385/12188 [5:13:32<19:55:11, 7.32s/it] 20%|█▉ | 2386/12188 [5:13:40<20:07:46, 7.39s/it] {'loss': 0.377, 'grad_norm': 0.589850216098204, 'learning_rate': 9.297433958011836e-06, 'epoch': 0.2} + 20%|█▉ | 2386/12188 [5:13:40<20:07:46, 7.39s/it] 20%|█▉ | 2387/12188 [5:13:48<21:03:37, 7.74s/it] {'loss': 0.351, 'grad_norm': 0.6997192198506152, 'learning_rate': 9.296754627598533e-06, 'epoch': 0.2} + 20%|█▉ | 2387/12188 [5:13:48<21:03:37, 7.74s/it] 20%|█▉ | 2388/12188 [5:13:55<20:23:18, 7.49s/it] {'loss': 0.4118, 'grad_norm': 0.6148879349719744, 'learning_rate': 9.296074993755321e-06, 'epoch': 0.2} + 20%|█▉ | 2388/12188 [5:13:55<20:23:18, 7.49s/it] 20%|█▉ | 2389/12188 [5:14:02<19:51:45, 7.30s/it] {'loss': 0.3765, 'grad_norm': 0.6140273467884046, 'learning_rate': 9.295395056530195e-06, 'epoch': 0.2} + 20%|█▉ | 2389/12188 [5:14:02<19:51:45, 7.30s/it] 20%|█▉ | 2390/12188 [5:14:09<19:32:01, 7.18s/it] {'loss': 0.3664, 'grad_norm': 0.6433561116710074, 'learning_rate': 9.294714815971171e-06, 'epoch': 0.2} + 20%|█▉ | 2390/12188 [5:14:09<19:32:01, 7.18s/it] 20%|█▉ | 2391/12188 [5:14:16<19:11:10, 7.05s/it] {'loss': 0.393, 'grad_norm': 0.6878520688476624, 'learning_rate': 9.294034272126286e-06, 'epoch': 0.2} + 20%|█▉ | 2391/12188 [5:14:16<19:11:10, 7.05s/it] 20%|█▉ | 2392/12188 [5:14:24<19:40:37, 7.23s/it] {'loss': 0.3712, 'grad_norm': 0.6625706750801742, 'learning_rate': 9.293353425043602e-06, 'epoch': 0.2} + 20%|█▉ | 2392/12188 [5:14:24<19:40:37, 7.23s/it] 20%|█▉ | 2393/12188 [5:14:31<19:36:26, 7.21s/it] {'loss': 0.3489, 'grad_norm': 0.6302656156108761, 'learning_rate': 9.292672274771195e-06, 'epoch': 0.2} + 20%|█▉ | 2393/12188 [5:14:31<19:36:26, 7.21s/it] 20%|█▉ | 2394/12188 [5:14:37<19:12:14, 7.06s/it] {'loss': 0.3996, 'grad_norm': 0.6205742578498861, 'learning_rate': 9.291990821357169e-06, 'epoch': 0.2} + 20%|█▉ | 2394/12188 [5:14:37<19:12:14, 7.06s/it] 20%|█▉ | 2395/12188 [5:14:44<19:04:46, 7.01s/it] {'loss': 0.3935, 'grad_norm': 0.6851257416146276, 'learning_rate': 9.291309064849647e-06, 'epoch': 0.2} + 20%|█▉ | 2395/12188 [5:14:44<19:04:46, 7.01s/it] 20%|█▉ | 2396/12188 [5:14:51<18:59:15, 6.98s/it] {'loss': 0.3346, 'grad_norm': 0.6407958944446134, 'learning_rate': 9.290627005296774e-06, 'epoch': 0.2} + 20%|█▉ | 2396/12188 [5:14:51<18:59:15, 6.98s/it] 20%|█▉ | 2397/12188 [5:14:58<18:51:17, 6.93s/it] {'loss': 0.3925, 'grad_norm': 0.6542314536246738, 'learning_rate': 9.289944642746716e-06, 'epoch': 0.2} + 20%|█▉ | 2397/12188 [5:14:58<18:51:17, 6.93s/it] 20%|█▉ | 2398/12188 [5:15:05<18:56:05, 6.96s/it] {'loss': 0.38, 'grad_norm': 0.6741693286973981, 'learning_rate': 9.289261977247657e-06, 'epoch': 0.2} + 20%|█▉ | 2398/12188 [5:15:05<18:56:05, 6.96s/it] 20%|█▉ | 2399/12188 [5:15:12<18:40:51, 6.87s/it] {'loss': 0.3942, 'grad_norm': 0.6600185965876197, 'learning_rate': 9.288579008847808e-06, 'epoch': 0.2} + 20%|█▉ | 2399/12188 [5:15:12<18:40:51, 6.87s/it] 20%|█▉ | 2400/12188 [5:15:19<18:59:50, 6.99s/it] {'loss': 0.3668, 'grad_norm': 0.6500004989772018, 'learning_rate': 9.287895737595402e-06, 'epoch': 0.2} + 20%|█▉ | 2400/12188 [5:15:19<18:59:50, 6.99s/it] 20%|█▉ | 2401/12188 [5:15:26<19:04:45, 7.02s/it] {'loss': 0.3987, 'grad_norm': 0.6578815761528988, 'learning_rate': 9.287212163538688e-06, 'epoch': 0.2} + 20%|█▉ | 2401/12188 [5:15:26<19:04:45, 7.02s/it] 20%|█▉ | 2402/12188 [5:15:33<19:04:30, 7.02s/it] {'loss': 0.3674, 'grad_norm': 0.597979129506302, 'learning_rate': 9.286528286725938e-06, 'epoch': 0.2} + 20%|█▉ | 2402/12188 [5:15:33<19:04:30, 7.02s/it] 20%|█▉ | 2403/12188 [5:15:40<18:49:55, 6.93s/it] {'loss': 0.3876, 'grad_norm': 0.7644720592103794, 'learning_rate': 9.285844107205449e-06, 'epoch': 0.2} + 20%|█▉ | 2403/12188 [5:15:40<18:49:55, 6.93s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 20%|█▉ | 2404/12188 [5:15:45<17:41:40, 6.51s/it] {'loss': 0.7827, 'grad_norm': 1.6390271640978227, 'learning_rate': 9.285159625025532e-06, 'epoch': 0.2} + 20%|█▉ | 2404/12188 [5:15:45<17:41:40, 6.51s/it] 20%|█▉ | 2405/12188 [5:15:53<18:56:04, 6.97s/it] {'loss': 0.3266, 'grad_norm': 0.5894560510348452, 'learning_rate': 9.284474840234528e-06, 'epoch': 0.2} + 20%|█▉ | 2405/12188 [5:15:53<18:56:04, 6.97s/it] 20%|█▉ | 2406/12188 [5:16:00<18:49:44, 6.93s/it] {'loss': 0.3875, 'grad_norm': 0.6591760439526413, 'learning_rate': 9.283789752880794e-06, 'epoch': 0.2} + 20%|█▉ | 2406/12188 [5:16:00<18:49:44, 6.93s/it] 20%|█▉ | 2407/12188 [5:16:08<19:18:32, 7.11s/it] {'loss': 0.389, 'grad_norm': 0.6727596787251703, 'learning_rate': 9.283104363012712e-06, 'epoch': 0.2} + 20%|█▉ | 2407/12188 [5:16:08<19:18:32, 7.11s/it] 20%|█▉ | 2408/12188 [5:16:15<19:22:27, 7.13s/it] {'loss': 0.3925, 'grad_norm': 0.6894187916010971, 'learning_rate': 9.282418670678679e-06, 'epoch': 0.2} + 20%|█▉ | 2408/12188 [5:16:15<19:22:27, 7.13s/it] 20%|█▉ | 2409/12188 [5:16:22<19:03:06, 7.01s/it] {'loss': 0.4233, 'grad_norm': 0.9338308791000948, 'learning_rate': 9.28173267592712e-06, 'epoch': 0.2} + 20%|█▉ | 2409/12188 [5:16:22<19:03:06, 7.01s/it] 20%|█▉ | 2410/12188 [5:16:29<19:34:26, 7.21s/it] {'loss': 0.3758, 'grad_norm': 0.6866554370071849, 'learning_rate': 9.28104637880648e-06, 'epoch': 0.2} + 20%|█▉ | 2410/12188 [5:16:29<19:34:26, 7.21s/it] 20%|█▉ | 2411/12188 [5:16:37<19:36:48, 7.22s/it] {'loss': 0.3763, 'grad_norm': 0.8298722722337927, 'learning_rate': 9.280359779365223e-06, 'epoch': 0.2} + 20%|█▉ | 2411/12188 [5:16:37<19:36:48, 7.22s/it] 20%|█▉ | 2412/12188 [5:16:43<19:14:54, 7.09s/it] {'loss': 0.3843, 'grad_norm': 0.6099956718290087, 'learning_rate': 9.279672877651833e-06, 'epoch': 0.2} + 20%|█▉ | 2412/12188 [5:16:43<19:14:54, 7.09s/it] 20%|█▉ | 2413/12188 [5:16:50<19:03:36, 7.02s/it] {'loss': 0.3958, 'grad_norm': 0.7471474555886637, 'learning_rate': 9.278985673714822e-06, 'epoch': 0.2} + 20%|█▉ | 2413/12188 [5:16:50<19:03:36, 7.02s/it] 20%|█▉ | 2414/12188 [5:16:57<18:52:45, 6.95s/it] {'loss': 0.4429, 'grad_norm': 0.7018924458043543, 'learning_rate': 9.278298167602716e-06, 'epoch': 0.2} + 20%|█▉ | 2414/12188 [5:16:57<18:52:45, 6.95s/it] 20%|█▉ | 2415/12188 [5:17:04<18:55:18, 6.97s/it] {'loss': 0.3703, 'grad_norm': 0.634952444708645, 'learning_rate': 9.277610359364068e-06, 'epoch': 0.2} + 20%|█▉ | 2415/12188 [5:17:04<18:55:18, 6.97s/it] 20%|█▉ | 2416/12188 [5:17:11<19:01:30, 7.01s/it] {'loss': 0.4016, 'grad_norm': 0.6768139062394007, 'learning_rate': 9.276922249047449e-06, 'epoch': 0.2} + 20%|█▉ | 2416/12188 [5:17:11<19:01:30, 7.01s/it] 20%|█▉ | 2417/12188 [5:17:18<19:12:53, 7.08s/it] {'loss': 0.4129, 'grad_norm': 0.7026517447161629, 'learning_rate': 9.276233836701452e-06, 'epoch': 0.2} + 20%|█▉ | 2417/12188 [5:17:18<19:12:53, 7.08s/it] 20%|█▉ | 2418/12188 [5:17:26<19:46:46, 7.29s/it] {'loss': 0.3915, 'grad_norm': 0.7045179946729204, 'learning_rate': 9.275545122374691e-06, 'epoch': 0.2} + 20%|█▉ | 2418/12188 [5:17:26<19:46:46, 7.29s/it] 20%|█▉ | 2419/12188 [5:17:33<19:19:33, 7.12s/it] {'loss': 0.419, 'grad_norm': 0.6301978495651493, 'learning_rate': 9.274856106115804e-06, 'epoch': 0.2} + 20%|█▉ | 2419/12188 [5:17:33<19:19:33, 7.12s/it] 20%|█▉ | 2420/12188 [5:17:40<19:42:10, 7.26s/it] {'loss': 0.397, 'grad_norm': 0.6612030207215627, 'learning_rate': 9.274166787973447e-06, 'epoch': 0.2} + 20%|█▉ | 2420/12188 [5:17:40<19:42:10, 7.26s/it] 20%|█▉ | 2421/12188 [5:17:48<19:59:43, 7.37s/it] {'loss': 0.3711, 'grad_norm': 0.6404455848892218, 'learning_rate': 9.273477167996297e-06, 'epoch': 0.2} + 20%|█▉ | 2421/12188 [5:17:48<19:59:43, 7.37s/it] 20%|█▉ | 2422/12188 [5:17:55<19:45:58, 7.29s/it] {'loss': 0.3486, 'grad_norm': 0.6372516432575472, 'learning_rate': 9.272787246233055e-06, 'epoch': 0.2} + 20%|█▉ | 2422/12188 [5:17:55<19:45:58, 7.29s/it] 20%|█▉ | 2423/12188 [5:18:02<19:35:38, 7.22s/it] {'loss': 0.431, 'grad_norm': 0.6923049597965895, 'learning_rate': 9.272097022732444e-06, 'epoch': 0.2} + 20%|█▉ | 2423/12188 [5:18:02<19:35:38, 7.22s/it] 20%|█▉ | 2424/12188 [5:18:09<19:15:28, 7.10s/it] {'loss': 0.4186, 'grad_norm': 0.6595526316435263, 'learning_rate': 9.271406497543203e-06, 'epoch': 0.2} + 20%|█▉ | 2424/12188 [5:18:09<19:15:28, 7.10s/it] 20%|█▉ | 2425/12188 [5:18:16<19:27:06, 7.17s/it] {'loss': 0.3756, 'grad_norm': 0.640709629763413, 'learning_rate': 9.2707156707141e-06, 'epoch': 0.2} + 20%|█▉ | 2425/12188 [5:18:16<19:27:06, 7.17s/it] 20%|█▉ | 2426/12188 [5:18:24<19:39:36, 7.25s/it] {'loss': 0.4083, 'grad_norm': 0.7600656638226995, 'learning_rate': 9.270024542293917e-06, 'epoch': 0.2} + 20%|█▉ | 2426/12188 [5:18:24<19:39:36, 7.25s/it] 20%|█▉ | 2427/12188 [5:18:32<20:05:24, 7.41s/it] {'loss': 0.3588, 'grad_norm': 0.5768666273826845, 'learning_rate': 9.26933311233146e-06, 'epoch': 0.2} + 20%|█▉ | 2427/12188 [5:18:32<20:05:24, 7.41s/it] 20%|█▉ | 2428/12188 [5:18:39<19:50:44, 7.32s/it] {'loss': 0.4078, 'grad_norm': 0.6530369856226637, 'learning_rate': 9.268641380875558e-06, 'epoch': 0.2} + 20%|█▉ | 2428/12188 [5:18:39<19:50:44, 7.32s/it] 20%|█▉ | 2429/12188 [5:18:45<19:23:26, 7.15s/it] {'loss': 0.3885, 'grad_norm': 0.7542791332480379, 'learning_rate': 9.26794934797506e-06, 'epoch': 0.2} + 20%|█▉ | 2429/12188 [5:18:45<19:23:26, 7.15s/it] 20%|█▉ | 2430/12188 [5:18:53<19:42:57, 7.27s/it] {'loss': 0.3776, 'grad_norm': 0.6093758525387879, 'learning_rate': 9.267257013678838e-06, 'epoch': 0.2} + 20%|█▉ | 2430/12188 [5:18:53<19:42:57, 7.27s/it] 20%|█▉ | 2431/12188 [5:19:00<19:36:31, 7.23s/it] {'loss': 0.4059, 'grad_norm': 0.728821713930413, 'learning_rate': 9.266564378035778e-06, 'epoch': 0.2} + 20%|█▉ | 2431/12188 [5:19:00<19:36:31, 7.23s/it] 20%|█▉ | 2432/12188 [5:19:07<19:22:22, 7.15s/it] {'loss': 0.3757, 'grad_norm': 0.7797850821447573, 'learning_rate': 9.2658714410948e-06, 'epoch': 0.2} + 20%|█▉ | 2432/12188 [5:19:07<19:22:22, 7.15s/it] 20%|█▉ | 2433/12188 [5:19:14<19:19:17, 7.13s/it] {'loss': 0.4198, 'grad_norm': 0.76934715671421, 'learning_rate': 9.265178202904832e-06, 'epoch': 0.2} + 20%|█▉ | 2433/12188 [5:19:14<19:19:17, 7.13s/it] 20%|█▉ | 2434/12188 [5:19:22<19:31:13, 7.20s/it] {'loss': 0.3661, 'grad_norm': 0.9146614702592466, 'learning_rate': 9.264484663514832e-06, 'epoch': 0.2} + 20%|█▉ | 2434/12188 [5:19:22<19:31:13, 7.20s/it] 20%|█▉ | 2435/12188 [5:19:30<20:09:28, 7.44s/it] {'loss': 0.3776, 'grad_norm': 1.6643746278538945, 'learning_rate': 9.263790822973778e-06, 'epoch': 0.2} + 20%|█▉ | 2435/12188 [5:19:30<20:09:28, 7.44s/it] 20%|█▉ | 2436/12188 [5:19:38<21:17:05, 7.86s/it] {'loss': 0.3903, 'grad_norm': 7.341172149670472, 'learning_rate': 9.263096681330665e-06, 'epoch': 0.2} + 20%|█▉ | 2436/12188 [5:19:38<21:17:05, 7.86s/it] 20%|█▉ | 2437/12188 [5:19:45<20:34:54, 7.60s/it] {'loss': 0.3742, 'grad_norm': 2.723318094343984, 'learning_rate': 9.262402238634514e-06, 'epoch': 0.2} + 20%|█▉ | 2437/12188 [5:19:45<20:34:54, 7.60s/it] 20%|██ | 2438/12188 [5:19:52<20:09:01, 7.44s/it] {'loss': 0.3904, 'grad_norm': 0.6625932061133696, 'learning_rate': 9.261707494934365e-06, 'epoch': 0.2} + 20%|██ | 2438/12188 [5:19:52<20:09:01, 7.44s/it] 20%|██ | 2439/12188 [5:19:59<19:44:26, 7.29s/it] {'loss': 0.431, 'grad_norm': 0.68402025879098, 'learning_rate': 9.26101245027928e-06, 'epoch': 0.2} + 20%|██ | 2439/12188 [5:19:59<19:44:26, 7.29s/it] 20%|██ | 2440/12188 [5:20:07<19:37:00, 7.24s/it] {'loss': 0.3804, 'grad_norm': 1.4648379743506206, 'learning_rate': 9.260317104718341e-06, 'epoch': 0.2} + 20%|██ | 2440/12188 [5:20:07<19:37:00, 7.24s/it] 20%|██ | 2441/12188 [5:20:14<19:35:15, 7.23s/it] {'loss': 0.3737, 'grad_norm': 0.6333649909430112, 'learning_rate': 9.259621458300653e-06, 'epoch': 0.2} + 20%|██ | 2441/12188 [5:20:14<19:35:15, 7.23s/it] 20%|██ | 2442/12188 [5:20:21<19:27:04, 7.18s/it] {'loss': 0.4065, 'grad_norm': 0.6133769772102693, 'learning_rate': 9.258925511075342e-06, 'epoch': 0.2} + 20%|██ | 2442/12188 [5:20:21<19:27:04, 7.18s/it] 20%|██ | 2443/12188 [5:20:28<19:32:43, 7.22s/it] {'loss': 0.3661, 'grad_norm': 0.6167993518472847, 'learning_rate': 9.258229263091554e-06, 'epoch': 0.2} + 20%|██ | 2443/12188 [5:20:28<19:32:43, 7.22s/it] 20%|██ | 2444/12188 [5:20:35<19:26:50, 7.18s/it] {'loss': 0.3607, 'grad_norm': 0.5962270933531933, 'learning_rate': 9.257532714398457e-06, 'epoch': 0.2} + 20%|██ | 2444/12188 [5:20:35<19:26:50, 7.18s/it] 20%|██ | 2445/12188 [5:20:42<18:57:41, 7.01s/it] {'loss': 0.3544, 'grad_norm': 0.6119506230590925, 'learning_rate': 9.256835865045237e-06, 'epoch': 0.2} + 20%|██ | 2445/12188 [5:20:42<18:57:41, 7.01s/it] 20%|██ | 2446/12188 [5:20:49<18:46:06, 6.94s/it] {'loss': 0.3635, 'grad_norm': 0.6984681789431609, 'learning_rate': 9.25613871508111e-06, 'epoch': 0.2} + 20%|██ | 2446/12188 [5:20:49<18:46:06, 6.94s/it] 20%|██ | 2447/12188 [5:20:56<19:01:33, 7.03s/it] {'loss': 0.4047, 'grad_norm': 0.6734108091549181, 'learning_rate': 9.255441264555303e-06, 'epoch': 0.2} + 20%|██ | 2447/12188 [5:20:56<19:01:33, 7.03s/it] 20%|██ | 2448/12188 [5:21:03<18:56:40, 7.00s/it] {'loss': 0.3714, 'grad_norm': 0.7021954445146551, 'learning_rate': 9.254743513517072e-06, 'epoch': 0.2} + 20%|██ | 2448/12188 [5:21:03<18:56:40, 7.00s/it] 20%|██ | 2449/12188 [5:21:10<18:56:22, 7.00s/it] {'loss': 0.4064, 'grad_norm': 0.6817109934966334, 'learning_rate': 9.254045462015691e-06, 'epoch': 0.2} + 20%|██ | 2449/12188 [5:21:10<18:56:22, 7.00s/it] 20%|██ | 2450/12188 [5:21:17<18:50:45, 6.97s/it] {'loss': 0.3578, 'grad_norm': 0.6599785957237947, 'learning_rate': 9.253347110100453e-06, 'epoch': 0.2} + 20%|██ | 2450/12188 [5:21:17<18:50:45, 6.97s/it] 20%|██ | 2451/12188 [5:21:25<20:10:11, 7.46s/it] {'loss': 0.4133, 'grad_norm': 0.650214581742734, 'learning_rate': 9.252648457820676e-06, 'epoch': 0.2} + 20%|██ | 2451/12188 [5:21:25<20:10:11, 7.46s/it] 20%|██ | 2452/12188 [5:21:32<19:48:12, 7.32s/it] {'loss': 0.3503, 'grad_norm': 0.6743089185953098, 'learning_rate': 9.251949505225698e-06, 'epoch': 0.2} + 20%|██ | 2452/12188 [5:21:32<19:48:12, 7.32s/it] 20%|██ | 2453/12188 [5:21:40<20:06:08, 7.43s/it] {'loss': 0.3698, 'grad_norm': 0.6210238181287334, 'learning_rate': 9.251250252364873e-06, 'epoch': 0.2} + 20%|██ | 2453/12188 [5:21:40<20:06:08, 7.43s/it] 20%|██ | 2454/12188 [5:21:47<19:47:08, 7.32s/it] {'loss': 0.4017, 'grad_norm': 0.6748403622548241, 'learning_rate': 9.25055069928759e-06, 'epoch': 0.2} + 20%|██ | 2454/12188 [5:21:47<19:47:08, 7.32s/it] 20%|██ | 2455/12188 [5:21:54<19:47:10, 7.32s/it] {'loss': 0.3748, 'grad_norm': 0.619584121209874, 'learning_rate': 9.249850846043244e-06, 'epoch': 0.2} + 20%|██ | 2455/12188 [5:21:54<19:47:10, 7.32s/it] 20%|██ | 2456/12188 [5:22:01<19:15:15, 7.12s/it] {'loss': 0.3908, 'grad_norm': 0.6702978661475986, 'learning_rate': 9.24915069268126e-06, 'epoch': 0.2} + 20%|██ | 2456/12188 [5:22:01<19:15:15, 7.12s/it] 20%|██ | 2457/12188 [5:22:09<20:02:46, 7.42s/it] {'loss': 0.3639, 'grad_norm': 0.6149596959206236, 'learning_rate': 9.24845023925108e-06, 'epoch': 0.2} + 20%|██ | 2457/12188 [5:22:09<20:02:46, 7.42s/it] 20%|██ | 2458/12188 [5:22:16<19:26:23, 7.19s/it] {'loss': 0.3702, 'grad_norm': 0.6870720441157786, 'learning_rate': 9.24774948580217e-06, 'epoch': 0.2} + 20%|██ | 2458/12188 [5:22:16<19:26:23, 7.19s/it] 20%|██ | 2459/12188 [5:22:24<20:25:05, 7.56s/it] {'loss': 0.3689, 'grad_norm': 0.603141165689622, 'learning_rate': 9.247048432384015e-06, 'epoch': 0.2} + 20%|██ | 2459/12188 [5:22:24<20:25:05, 7.56s/it] 20%|██ | 2460/12188 [5:22:31<20:00:02, 7.40s/it] {'loss': 0.4183, 'grad_norm': 0.8007614036681425, 'learning_rate': 9.246347079046124e-06, 'epoch': 0.2} + 20%|██ | 2460/12188 [5:22:31<20:00:02, 7.40s/it] 20%|██ | 2461/12188 [5:22:39<20:18:12, 7.51s/it] {'loss': 0.4201, 'grad_norm': 0.6421298279910205, 'learning_rate': 9.245645425838025e-06, 'epoch': 0.2} + 20%|██ | 2461/12188 [5:22:39<20:18:12, 7.51s/it] 20%|██ | 2462/12188 [5:22:47<20:59:42, 7.77s/it] {'loss': 0.3631, 'grad_norm': 0.6448477097517213, 'learning_rate': 9.244943472809267e-06, 'epoch': 0.2} + 20%|██ | 2462/12188 [5:22:47<20:59:42, 7.77s/it] 20%|██ | 2463/12188 [5:22:55<20:38:07, 7.64s/it] {'loss': 0.3648, 'grad_norm': 0.5993308014050052, 'learning_rate': 9.24424122000942e-06, 'epoch': 0.2} + 20%|██ | 2463/12188 [5:22:55<20:38:07, 7.64s/it] 20%|██ | 2464/12188 [5:23:02<20:29:45, 7.59s/it] {'loss': 0.3703, 'grad_norm': 0.6428438234885919, 'learning_rate': 9.243538667488076e-06, 'epoch': 0.2} + 20%|██ | 2464/12188 [5:23:02<20:29:45, 7.59s/it] 20%|██ | 2465/12188 [5:23:09<19:57:21, 7.39s/it] {'loss': 0.3719, 'grad_norm': 0.6198728433843121, 'learning_rate': 9.242835815294851e-06, 'epoch': 0.2} + 20%|██ | 2465/12188 [5:23:09<19:57:21, 7.39s/it] 20%|██ | 2466/12188 [5:23:17<20:18:48, 7.52s/it] {'loss': 0.3663, 'grad_norm': 0.599305199704726, 'learning_rate': 9.242132663479378e-06, 'epoch': 0.2} + 20%|██ | 2466/12188 [5:23:17<20:18:48, 7.52s/it] 20%|██ | 2467/12188 [5:23:24<19:49:50, 7.34s/it] {'loss': 0.3765, 'grad_norm': 0.6643431379381497, 'learning_rate': 9.24142921209131e-06, 'epoch': 0.2} + 20%|██ | 2467/12188 [5:23:24<19:49:50, 7.34s/it] 20%|██ | 2468/12188 [5:23:31<19:57:31, 7.39s/it] {'loss': 0.3914, 'grad_norm': 0.6445376697161935, 'learning_rate': 9.240725461180327e-06, 'epoch': 0.2} + 20%|██ | 2468/12188 [5:23:31<19:57:31, 7.39s/it] 20%|██ | 2469/12188 [5:23:38<19:22:04, 7.17s/it] {'loss': 0.3351, 'grad_norm': 0.5847350731032054, 'learning_rate': 9.240021410796125e-06, 'epoch': 0.2} + 20%|██ | 2469/12188 [5:23:38<19:22:04, 7.17s/it] 20%|██ | 2470/12188 [5:23:45<19:31:28, 7.23s/it] {'loss': 0.3793, 'grad_norm': 0.6145280301010878, 'learning_rate': 9.239317060988422e-06, 'epoch': 0.2} + 20%|██ | 2470/12188 [5:23:45<19:31:28, 7.23s/it] 20%|██ | 2471/12188 [5:23:52<19:20:49, 7.17s/it] {'loss': 0.3767, 'grad_norm': 0.6004232800873174, 'learning_rate': 9.238612411806959e-06, 'epoch': 0.2} + 20%|██ | 2471/12188 [5:23:52<19:20:49, 7.17s/it] 20%|██ | 2472/12188 [5:24:00<19:18:41, 7.16s/it] {'loss': 0.3914, 'grad_norm': 0.6358736405047657, 'learning_rate': 9.237907463301499e-06, 'epoch': 0.2} + 20%|██ | 2472/12188 [5:24:00<19:18:41, 7.16s/it] 20%|██ | 2473/12188 [5:24:07<19:30:07, 7.23s/it] {'loss': 0.4055, 'grad_norm': 0.7328805760166421, 'learning_rate': 9.237202215521822e-06, 'epoch': 0.2} + 20%|██ | 2473/12188 [5:24:07<19:30:07, 7.23s/it] 20%|██ | 2474/12188 [5:24:14<19:36:16, 7.27s/it] {'loss': 0.4082, 'grad_norm': 0.6417973166510093, 'learning_rate': 9.236496668517731e-06, 'epoch': 0.2} + 20%|██ | 2474/12188 [5:24:14<19:36:16, 7.27s/it] 20%|██ | 2475/12188 [5:24:21<19:23:31, 7.19s/it] {'loss': 0.3863, 'grad_norm': 0.6276167169465922, 'learning_rate': 9.235790822339052e-06, 'epoch': 0.2} + 20%|██ | 2475/12188 [5:24:21<19:23:31, 7.19s/it] 20%|██ | 2476/12188 [5:24:28<19:07:35, 7.09s/it] {'loss': 0.3645, 'grad_norm': 0.6697461871997861, 'learning_rate': 9.23508467703563e-06, 'epoch': 0.2} + 20%|██ | 2476/12188 [5:24:28<19:07:35, 7.09s/it] 20%|██ | 2477/12188 [5:24:35<18:49:48, 6.98s/it] {'loss': 0.3573, 'grad_norm': 0.6311349176838916, 'learning_rate': 9.234378232657334e-06, 'epoch': 0.2} + 20%|██ | 2477/12188 [5:24:35<18:49:48, 6.98s/it] 20%|██ | 2478/12188 [5:24:43<19:41:30, 7.30s/it] {'loss': 0.4076, 'grad_norm': 0.642336113201591, 'learning_rate': 9.233671489254048e-06, 'epoch': 0.2} + 20%|██ | 2478/12188 [5:24:43<19:41:30, 7.30s/it] 20%|██ | 2479/12188 [5:24:50<19:26:03, 7.21s/it] {'loss': 0.3905, 'grad_norm': 0.654791174020213, 'learning_rate': 9.232964446875685e-06, 'epoch': 0.2} + 20%|██ | 2479/12188 [5:24:50<19:26:03, 7.21s/it] 20%|██ | 2480/12188 [5:24:57<19:06:04, 7.08s/it] {'loss': 0.3989, 'grad_norm': 0.6770208824107404, 'learning_rate': 9.232257105572173e-06, 'epoch': 0.2} + 20%|██ | 2480/12188 [5:24:57<19:06:04, 7.08s/it] 20%|██ | 2481/12188 [5:25:04<19:01:35, 7.06s/it] {'loss': 0.3804, 'grad_norm': 0.6129336277312075, 'learning_rate': 9.231549465393466e-06, 'epoch': 0.2} + 20%|██ | 2481/12188 [5:25:04<19:01:35, 7.06s/it] 20%|██ | 2482/12188 [5:25:14<21:28:03, 7.96s/it] {'loss': 0.3753, 'grad_norm': 0.6025250742520959, 'learning_rate': 9.230841526389532e-06, 'epoch': 0.2} + 20%|██ | 2482/12188 [5:25:14<21:28:03, 7.96s/it] 20%|██ | 2483/12188 [5:25:21<20:39:29, 7.66s/it] {'loss': 0.4086, 'grad_norm': 0.6456490879843146, 'learning_rate': 9.230133288610366e-06, 'epoch': 0.2} + 20%|██ | 2483/12188 [5:25:21<20:39:29, 7.66s/it] 20%|██ | 2484/12188 [5:25:28<19:56:00, 7.39s/it] {'loss': 0.365, 'grad_norm': 0.6841928238489919, 'learning_rate': 9.229424752105986e-06, 'epoch': 0.2} + 20%|██ | 2484/12188 [5:25:28<19:56:00, 7.39s/it] 20%|██ | 2485/12188 [5:25:35<19:50:24, 7.36s/it] {'loss': 0.3812, 'grad_norm': 0.6626579538188243, 'learning_rate': 9.228715916926423e-06, 'epoch': 0.2} + 20%|██ | 2485/12188 [5:25:35<19:50:24, 7.36s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f0cd6a6a5c0> +[Try #0] Failed to fetch sample 4605209 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f0cd6a6a5c0> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Daily Deals'"}, {'from': 'gpt', 'value': '\nclick(x=0.349, y=0.1195)\n'}]} + 20%|██ | 2486/12188 [5:25:43<20:22:41, 7.56s/it] {'loss': 0.4164, 'grad_norm': 0.6651270462316579, 'learning_rate': 9.228006783121737e-06, 'epoch': 0.2} + 20%|██ | 2486/12188 [5:25:43<20:22:41, 7.56s/it] 20%|██ | 2487/12188 [5:25:50<19:58:56, 7.42s/it] {'loss': 0.3731, 'grad_norm': 0.6520317542262685, 'learning_rate': 9.227297350742004e-06, 'epoch': 0.2} + 20%|██ | 2487/12188 [5:25:50<19:58:56, 7.42s/it] 20%|██ | 2488/12188 [5:25:57<19:50:55, 7.37s/it] {'loss': 0.398, 'grad_norm': 0.6420033718758106, 'learning_rate': 9.226587619837323e-06, 'epoch': 0.2} + 20%|██ | 2488/12188 [5:25:57<19:50:55, 7.37s/it] 20%|██ | 2489/12188 [5:26:05<20:19:01, 7.54s/it] {'loss': 0.4116, 'grad_norm': 0.6478761712165978, 'learning_rate': 9.225877590457817e-06, 'epoch': 0.2} + 20%|██ | 2489/12188 [5:26:05<20:19:01, 7.54s/it] 20%|██ | 2490/12188 [5:26:12<19:47:15, 7.35s/it] {'loss': 0.4156, 'grad_norm': 0.6877741147755323, 'learning_rate': 9.225167262653623e-06, 'epoch': 0.2} + 20%|██ | 2490/12188 [5:26:12<19:47:15, 7.35s/it] 20%|██ | 2491/12188 [5:26:19<19:44:49, 7.33s/it] {'loss': 0.3969, 'grad_norm': 0.6486003228025788, 'learning_rate': 9.224456636474906e-06, 'epoch': 0.2} + 20%|██ | 2491/12188 [5:26:19<19:44:49, 7.33s/it] 20%|██ | 2492/12188 [5:26:26<19:17:49, 7.16s/it] {'loss': 0.3686, 'grad_norm': 0.6723038590546425, 'learning_rate': 9.223745711971849e-06, 'epoch': 0.2} + 20%|██ | 2492/12188 [5:26:26<19:17:49, 7.16s/it] 20%|██ | 2493/12188 [5:26:34<19:31:30, 7.25s/it] {'loss': 0.348, 'grad_norm': 0.6094318863015463, 'learning_rate': 9.223034489194655e-06, 'epoch': 0.2} + 20%|██ | 2493/12188 [5:26:34<19:31:30, 7.25s/it] 20%|██ | 2494/12188 [5:26:40<19:01:38, 7.07s/it] {'loss': 0.3856, 'grad_norm': 0.6318595968907784, 'learning_rate': 9.222322968193551e-06, 'epoch': 0.2} + 20%|██ | 2494/12188 [5:26:40<19:01:38, 7.07s/it] 20%|██ | 2495/12188 [5:26:48<19:23:28, 7.20s/it] {'loss': 0.4001, 'grad_norm': 0.674235166876527, 'learning_rate': 9.221611149018781e-06, 'epoch': 0.2} + 20%|██ | 2495/12188 [5:26:48<19:23:28, 7.20s/it] 20%|██ | 2496/12188 [5:26:55<19:07:00, 7.10s/it] {'loss': 0.3767, 'grad_norm': 0.650487171079544, 'learning_rate': 9.220899031720615e-06, 'epoch': 0.2} + 20%|██ | 2496/12188 [5:26:55<19:07:00, 7.10s/it] 20%|██ | 2497/12188 [5:27:03<20:20:53, 7.56s/it] {'loss': 0.3631, 'grad_norm': 0.6301554493117935, 'learning_rate': 9.220186616349342e-06, 'epoch': 0.2} + 20%|██ | 2497/12188 [5:27:03<20:20:53, 7.56s/it] 20%|██ | 2498/12188 [5:27:11<20:30:57, 7.62s/it] {'loss': 0.3981, 'grad_norm': 0.6805967023242344, 'learning_rate': 9.21947390295527e-06, 'epoch': 0.2} + 20%|██ | 2498/12188 [5:27:11<20:30:57, 7.62s/it] 21%|██ | 2499/12188 [5:27:19<20:49:27, 7.74s/it] {'loss': 0.3815, 'grad_norm': 0.6273462263520281, 'learning_rate': 9.21876089158873e-06, 'epoch': 0.21} + 21%|██ | 2499/12188 [5:27:19<20:49:27, 7.74s/it] 21%|██ | 2500/12188 [5:27:26<20:16:47, 7.54s/it] {'loss': 0.3715, 'grad_norm': 0.6386554608069779, 'learning_rate': 9.218047582300071e-06, 'epoch': 0.21} + 21%|██ | 2500/12188 [5:27:26<20:16:47, 7.54s/it] 21%|██ | 2501/12188 [5:27:33<19:44:18, 7.34s/it] {'loss': 0.3657, 'grad_norm': 0.6343396638094907, 'learning_rate': 9.217333975139673e-06, 'epoch': 0.21} + 21%|██ | 2501/12188 [5:27:33<19:44:18, 7.34s/it] 21%|██ | 2502/12188 [5:27:40<19:28:06, 7.24s/it] {'loss': 0.3536, 'grad_norm': 0.5935044333434655, 'learning_rate': 9.216620070157922e-06, 'epoch': 0.21} + 21%|��█ | 2502/12188 [5:27:40<19:28:06, 7.24s/it] 21%|██ | 2503/12188 [5:27:47<19:06:36, 7.10s/it] {'loss': 0.3975, 'grad_norm': 0.7905891102033439, 'learning_rate': 9.215905867405238e-06, 'epoch': 0.21} + 21%|██ | 2503/12188 [5:27:47<19:06:36, 7.10s/it] 21%|██ | 2504/12188 [5:27:54<18:54:52, 7.03s/it] {'loss': 0.406, 'grad_norm': 0.6765764073846644, 'learning_rate': 9.215191366932055e-06, 'epoch': 0.21} + 21%|██ | 2504/12188 [5:27:54<18:54:52, 7.03s/it] 21%|██ | 2505/12188 [5:28:00<18:42:14, 6.95s/it] {'loss': 0.355, 'grad_norm': 0.6288024935616046, 'learning_rate': 9.214476568788828e-06, 'epoch': 0.21} + 21%|██ | 2505/12188 [5:28:00<18:42:14, 6.95s/it] 21%|██ | 2506/12188 [5:28:07<18:48:43, 6.99s/it] {'loss': 0.3579, 'grad_norm': 0.62627135514007, 'learning_rate': 9.213761473026039e-06, 'epoch': 0.21} + 21%|██ | 2506/12188 [5:28:07<18:48:43, 6.99s/it] 21%|██ | 2507/12188 [5:28:15<18:57:48, 7.05s/it] {'loss': 0.4228, 'grad_norm': 0.662085036278497, 'learning_rate': 9.213046079694183e-06, 'epoch': 0.21} + 21%|██ | 2507/12188 [5:28:15<18:57:48, 7.05s/it] 21%|██ | 2508/12188 [5:28:21<18:49:36, 7.00s/it] {'loss': 0.4412, 'grad_norm': 0.6473849995493279, 'learning_rate': 9.212330388843782e-06, 'epoch': 0.21} + 21%|██ | 2508/12188 [5:28:21<18:49:36, 7.00s/it] 21%|██ | 2509/12188 [5:28:29<19:38:15, 7.30s/it] {'loss': 0.4032, 'grad_norm': 0.6610397084958671, 'learning_rate': 9.211614400525377e-06, 'epoch': 0.21} + 21%|██ | 2509/12188 [5:28:30<19:38:15, 7.30s/it] 21%|██ | 2510/12188 [5:28:37<19:25:03, 7.22s/it] {'loss': 0.3949, 'grad_norm': 0.679636987949905, 'learning_rate': 9.21089811478953e-06, 'epoch': 0.21} + 21%|██ | 2510/12188 [5:28:37<19:25:03, 7.22s/it] 21%|██ | 2511/12188 [5:28:43<18:58:24, 7.06s/it] {'loss': 0.3605, 'grad_norm': 0.7091923837415262, 'learning_rate': 9.210181531686823e-06, 'epoch': 0.21} + 21%|██ | 2511/12188 [5:28:43<18:58:24, 7.06s/it] 21%|██ | 2512/12188 [5:28:50<18:57:32, 7.05s/it] {'loss': 0.4311, 'grad_norm': 0.6925181332355043, 'learning_rate': 9.209464651267862e-06, 'epoch': 0.21} + 21%|██ | 2512/12188 [5:28:50<18:57:32, 7.05s/it] 21%|██ | 2513/12188 [5:28:58<19:37:11, 7.30s/it] {'loss': 0.3675, 'grad_norm': 0.6346061250075556, 'learning_rate': 9.208747473583267e-06, 'epoch': 0.21} + 21%|██ | 2513/12188 [5:28:58<19:37:11, 7.30s/it] 21%|██ | 2514/12188 [5:29:05<19:19:14, 7.19s/it] {'loss': 0.3859, 'grad_norm': 0.6408648447434393, 'learning_rate': 9.208029998683688e-06, 'epoch': 0.21} + 21%|██ | 2514/12188 [5:29:05<19:19:14, 7.19s/it] 21%|██ | 2515/12188 [5:29:12<18:46:03, 6.98s/it] {'loss': 0.377, 'grad_norm': 0.6021887007768129, 'learning_rate': 9.207312226619793e-06, 'epoch': 0.21} + 21%|██ | 2515/12188 [5:29:12<18:46:03, 6.98s/it] 21%|██ | 2516/12188 [5:29:20<19:48:47, 7.37s/it] {'loss': 0.403, 'grad_norm': 0.5969534385443728, 'learning_rate': 9.206594157442267e-06, 'epoch': 0.21} + 21%|██ | 2516/12188 [5:29:20<19:48:47, 7.37s/it] 21%|██ | 2517/12188 [5:29:27<19:41:38, 7.33s/it] {'loss': 0.3432, 'grad_norm': 0.6075791250564491, 'learning_rate': 9.20587579120182e-06, 'epoch': 0.21} + 21%|██ | 2517/12188 [5:29:27<19:41:38, 7.33s/it] 21%|██ | 2518/12188 [5:29:34<19:23:57, 7.22s/it] {'loss': 0.3683, 'grad_norm': 0.6845160426906952, 'learning_rate': 9.205157127949182e-06, 'epoch': 0.21} + 21%|██ | 2518/12188 [5:29:34<19:23:57, 7.22s/it] 21%|██ | 2519/12188 [5:29:43<20:36:42, 7.67s/it] {'loss': 0.3804, 'grad_norm': 0.6959926461691346, 'learning_rate': 9.204438167735104e-06, 'epoch': 0.21} + 21%|██ | 2519/12188 [5:29:43<20:36:42, 7.67s/it] 21%|██ | 2520/12188 [5:29:49<19:49:19, 7.38s/it] {'loss': 0.4126, 'grad_norm': 0.6190919122064135, 'learning_rate': 9.203718910610358e-06, 'epoch': 0.21} + 21%|██ | 2520/12188 [5:29:49<19:49:19, 7.38s/it] 21%|██ | 2521/12188 [5:29:56<19:26:51, 7.24s/it] {'loss': 0.3795, 'grad_norm': 0.7201391503267754, 'learning_rate': 9.202999356625735e-06, 'epoch': 0.21} + 21%|██ | 2521/12188 [5:29:56<19:26:51, 7.24s/it] 21%|██ | 2522/12188 [5:30:03<18:51:37, 7.02s/it] {'loss': 0.3816, 'grad_norm': 0.6647600323815233, 'learning_rate': 9.202279505832053e-06, 'epoch': 0.21} + 21%|██ | 2522/12188 [5:30:03<18:51:37, 7.02s/it] 21%|██ | 2523/12188 [5:30:10<18:39:28, 6.95s/it] {'loss': 0.3703, 'grad_norm': 0.668155898495432, 'learning_rate': 9.20155935828014e-06, 'epoch': 0.21} + 21%|██ | 2523/12188 [5:30:10<18:39:28, 6.95s/it] 21%|██ | 2524/12188 [5:30:16<18:24:08, 6.86s/it] {'loss': 0.3843, 'grad_norm': 0.6732578208363885, 'learning_rate': 9.200838914020857e-06, 'epoch': 0.21} + 21%|██ | 2524/12188 [5:30:16<18:24:08, 6.86s/it] 21%|██ | 2525/12188 [5:30:23<18:34:49, 6.92s/it] {'loss': 0.3802, 'grad_norm': 0.6299295989507886, 'learning_rate': 9.200118173105079e-06, 'epoch': 0.21} + 21%|██ | 2525/12188 [5:30:23<18:34:49, 6.92s/it] 21%|██ | 2526/12188 [5:30:30<18:40:17, 6.96s/it] {'loss': 0.4092, 'grad_norm': 0.6865258250479377, 'learning_rate': 9.199397135583704e-06, 'epoch': 0.21} + 21%|██ | 2526/12188 [5:30:30<18:40:17, 6.96s/it] 21%|██ | 2527/12188 [5:30:38<18:48:11, 7.01s/it] {'loss': 0.3956, 'grad_norm': 0.6831176545914609, 'learning_rate': 9.198675801507651e-06, 'epoch': 0.21} + 21%|██ | 2527/12188 [5:30:38<18:48:11, 7.01s/it] 21%|██ | 2528/12188 [5:30:44<18:43:34, 6.98s/it] {'loss': 0.4013, 'grad_norm': 0.6603954295539408, 'learning_rate': 9.197954170927857e-06, 'epoch': 0.21} + 21%|██ | 2528/12188 [5:30:44<18:43:34, 6.98s/it] 21%|██ | 2529/12188 [5:30:52<19:26:31, 7.25s/it] {'loss': 0.3513, 'grad_norm': 0.7352828335180958, 'learning_rate': 9.197232243895285e-06, 'epoch': 0.21} + 21%|██ | 2529/12188 [5:30:52<19:26:31, 7.25s/it] 21%|██ | 2530/12188 [5:31:01<20:32:01, 7.65s/it] {'loss': 0.3702, 'grad_norm': 0.6752157676155963, 'learning_rate': 9.196510020460914e-06, 'epoch': 0.21} + 21%|██ | 2530/12188 [5:31:01<20:32:01, 7.65s/it] 21%|██ | 2531/12188 [5:31:08<20:19:22, 7.58s/it] {'loss': 0.4048, 'grad_norm': 1.0903409929835717, 'learning_rate': 9.195787500675748e-06, 'epoch': 0.21} + 21%|██ | 2531/12188 [5:31:08<20:19:22, 7.58s/it] 21%|██ | 2532/12188 [5:31:16<20:30:23, 7.65s/it] {'loss': 0.3688, 'grad_norm': 0.6621229346795444, 'learning_rate': 9.19506468459081e-06, 'epoch': 0.21} + 21%|██ | 2532/12188 [5:31:16<20:30:23, 7.65s/it] 21%|██ | 2533/12188 [5:31:23<19:54:34, 7.42s/it] {'loss': 0.3971, 'grad_norm': 0.6329987018266661, 'learning_rate': 9.194341572257145e-06, 'epoch': 0.21} + 21%|██ | 2533/12188 [5:31:23<19:54:34, 7.42s/it] 21%|██ | 2534/12188 [5:31:31<20:06:19, 7.50s/it] {'loss': 0.3677, 'grad_norm': 0.6731275703819781, 'learning_rate': 9.193618163725814e-06, 'epoch': 0.21} + 21%|██ | 2534/12188 [5:31:31<20:06:19, 7.50s/it] 21%|██ | 2535/12188 [5:31:38<19:55:10, 7.43s/it] {'loss': 0.3542, 'grad_norm': 0.6400521967933274, 'learning_rate': 9.192894459047908e-06, 'epoch': 0.21} + 21%|██ | 2535/12188 [5:31:38<19:55:10, 7.43s/it] 21%|██ | 2536/12188 [5:31:45<19:57:27, 7.44s/it] {'loss': 0.3835, 'grad_norm': 0.6219788260337382, 'learning_rate': 9.192170458274533e-06, 'epoch': 0.21} + 21%|██ | 2536/12188 [5:31:45<19:57:27, 7.44s/it] 21%|██ | 2537/12188 [5:31:52<19:21:33, 7.22s/it] {'loss': 0.3277, 'grad_norm': 0.5602779745400707, 'learning_rate': 9.191446161456811e-06, 'epoch': 0.21} + 21%|██ | 2537/12188 [5:31:52<19:21:33, 7.22s/it] 21%|██ | 2538/12188 [5:32:00<19:42:53, 7.35s/it] {'loss': 0.3597, 'grad_norm': 0.6387629806017365, 'learning_rate': 9.1907215686459e-06, 'epoch': 0.21} + 21%|██ | 2538/12188 [5:32:00<19:42:53, 7.35s/it] 21%|██ | 2539/12188 [5:32:09<21:27:05, 8.00s/it] {'loss': 0.3822, 'grad_norm': 0.6194926518947934, 'learning_rate': 9.189996679892963e-06, 'epoch': 0.21} + 21%|██ | 2539/12188 [5:32:09<21:27:05, 8.00s/it] 21%|██ | 2540/12188 [5:32:17<21:00:15, 7.84s/it] {'loss': 0.355, 'grad_norm': 0.6619313825063131, 'learning_rate': 9.189271495249191e-06, 'epoch': 0.21} + 21%|██ | 2540/12188 [5:32:17<21:00:15, 7.84s/it] 21%|██ | 2541/12188 [5:32:24<20:51:30, 7.78s/it] {'loss': 0.3658, 'grad_norm': 1.2650541102066193, 'learning_rate': 9.188546014765798e-06, 'epoch': 0.21} + 21%|██ | 2541/12188 [5:32:24<20:51:30, 7.78s/it] 21%|██ | 2542/12188 [5:32:31<19:57:20, 7.45s/it] {'loss': 0.387, 'grad_norm': 0.6112369919232287, 'learning_rate': 9.187820238494014e-06, 'epoch': 0.21} + 21%|██ | 2542/12188 [5:32:31<19:57:20, 7.45s/it] 21%|██ | 2543/12188 [5:32:38<19:52:02, 7.42s/it] {'loss': 0.3782, 'grad_norm': 0.6176682179302964, 'learning_rate': 9.187094166485094e-06, 'epoch': 0.21} + 21%|██ | 2543/12188 [5:32:38<19:52:02, 7.42s/it] 21%|██ | 2544/12188 [5:32:46<19:39:23, 7.34s/it] {'loss': 0.3674, 'grad_norm': 0.6404696741333546, 'learning_rate': 9.18636779879031e-06, 'epoch': 0.21} + 21%|██ | 2544/12188 [5:32:46<19:39:23, 7.34s/it] 21%|██ | 2545/12188 [5:32:53<19:24:44, 7.25s/it] {'loss': 0.435, 'grad_norm': 0.7025096095138386, 'learning_rate': 9.185641135460957e-06, 'epoch': 0.21} + 21%|██ | 2545/12188 [5:32:53<19:24:44, 7.25s/it] 21%|██ | 2546/12188 [5:33:00<19:30:13, 7.28s/it] {'loss': 0.3854, 'grad_norm': 0.6440880757634656, 'learning_rate': 9.184914176548354e-06, 'epoch': 0.21} + 21%|██ | 2546/12188 [5:33:00<19:30:13, 7.28s/it] 21%|██ | 2547/12188 [5:33:07<19:19:23, 7.22s/it] {'loss': 0.3947, 'grad_norm': 0.680057351973999, 'learning_rate': 9.184186922103835e-06, 'epoch': 0.21} + 21%|██ | 2547/12188 [5:33:07<19:19:23, 7.22s/it] 21%|██ | 2548/12188 [5:33:14<18:56:20, 7.07s/it] {'loss': 0.411, 'grad_norm': 0.6926670499330875, 'learning_rate': 9.183459372178758e-06, 'epoch': 0.21} + 21%|██ | 2548/12188 [5:33:14<18:56:20, 7.07s/it] 21%|██ | 2549/12188 [5:33:22<20:14:03, 7.56s/it] {'loss': 0.3501, 'grad_norm': 0.7354107719483063, 'learning_rate': 9.1827315268245e-06, 'epoch': 0.21} + 21%|██ | 2549/12188 [5:33:23<20:14:03, 7.56s/it] 21%|██ | 2550/12188 [5:33:31<20:37:28, 7.70s/it] {'loss': 0.3634, 'grad_norm': 0.6166892422702375, 'learning_rate': 9.182003386092462e-06, 'epoch': 0.21} + 21%|██ | 2550/12188 [5:33:31<20:37:28, 7.70s/it] 21%|██ | 2551/12188 [5:33:39<21:17:39, 7.95s/it] {'loss': 0.376, 'grad_norm': 0.6494766954642243, 'learning_rate': 9.181274950034065e-06, 'epoch': 0.21} + 21%|██ | 2551/12188 [5:33:39<21:17:39, 7.95s/it] 21%|██ | 2552/12188 [5:33:47<20:55:51, 7.82s/it] {'loss': 0.3715, 'grad_norm': 0.6723158082749306, 'learning_rate': 9.180546218700748e-06, 'epoch': 0.21} + 21%|██ | 2552/12188 [5:33:47<20:55:51, 7.82s/it] 21%|██ | 2553/12188 [5:33:55<21:08:42, 7.90s/it] {'loss': 0.376, 'grad_norm': 0.6886700081101815, 'learning_rate': 9.179817192143974e-06, 'epoch': 0.21} + 21%|██ | 2553/12188 [5:33:55<21:08:42, 7.90s/it] 21%|██ | 2554/12188 [5:34:02<20:30:29, 7.66s/it] {'loss': 0.4191, 'grad_norm': 0.6720438526297107, 'learning_rate': 9.179087870415225e-06, 'epoch': 0.21} + 21%|██ | 2554/12188 [5:34:02<20:30:29, 7.66s/it] 21%|██ | 2555/12188 [5:34:09<20:15:27, 7.57s/it] {'loss': 0.3662, 'grad_norm': 0.6352527444170173, 'learning_rate': 9.178358253566005e-06, 'epoch': 0.21} + 21%|██ | 2555/12188 [5:34:09<20:15:27, 7.57s/it] 21%|██ | 2556/12188 [5:34:16<19:54:05, 7.44s/it] {'loss': 0.4056, 'grad_norm': 0.6769711962910481, 'learning_rate': 9.177628341647837e-06, 'epoch': 0.21} + 21%|██ | 2556/12188 [5:34:16<19:54:05, 7.44s/it] 21%|██ | 2557/12188 [5:34:24<20:17:08, 7.58s/it] {'loss': 0.3612, 'grad_norm': 0.6114019555803968, 'learning_rate': 9.176898134712269e-06, 'epoch': 0.21} + 21%|██ | 2557/12188 [5:34:24<20:17:08, 7.58s/it] 21%|██ | 2558/12188 [5:34:32<20:13:28, 7.56s/it] {'loss': 0.3866, 'grad_norm': 0.6933371607834712, 'learning_rate': 9.176167632810864e-06, 'epoch': 0.21} + 21%|██ | 2558/12188 [5:34:32<20:13:28, 7.56s/it] 21%|██ | 2559/12188 [5:34:42<22:19:33, 8.35s/it] {'loss': 0.3299, 'grad_norm': 0.7733077719020665, 'learning_rate': 9.17543683599521e-06, 'epoch': 0.21} + 21%|██ | 2559/12188 [5:34:42<22:19:33, 8.35s/it] 21%|██ | 2560/12188 [5:34:50<22:04:32, 8.25s/it] {'loss': 0.3755, 'grad_norm': 0.7758775083614357, 'learning_rate': 9.174705744316917e-06, 'epoch': 0.21} + 21%|██ | 2560/12188 [5:34:50<22:04:32, 8.25s/it] 21%|██ | 2561/12188 [5:34:57<20:48:39, 7.78s/it] {'loss': 0.3961, 'grad_norm': 0.872009775262656, 'learning_rate': 9.173974357827611e-06, 'epoch': 0.21} + 21%|██ | 2561/12188 [5:34:57<20:48:39, 7.78s/it] 21%|██ | 2562/12188 [5:35:04<20:21:09, 7.61s/it] {'loss': 0.4486, 'grad_norm': 0.733150187218619, 'learning_rate': 9.173242676578941e-06, 'epoch': 0.21} + 21%|██ | 2562/12188 [5:35:04<20:21:09, 7.61s/it] 21%|██ | 2563/12188 [5:35:11<20:07:25, 7.53s/it] {'loss': 0.3502, 'grad_norm': 0.6947389945253467, 'learning_rate': 9.172510700622579e-06, 'epoch': 0.21} + 21%|██ | 2563/12188 [5:35:11<20:07:25, 7.53s/it] 21%|██ | 2564/12188 [5:35:19<20:09:30, 7.54s/it] {'loss': 0.3826, 'grad_norm': 0.7525518369283369, 'learning_rate': 9.171778430010213e-06, 'epoch': 0.21} + 21%|██ | 2564/12188 [5:35:19<20:09:30, 7.54s/it] 21%|██ | 2565/12188 [5:35:25<19:17:21, 7.22s/it] {'loss': 0.3621, 'grad_norm': 0.7253784658528457, 'learning_rate': 9.17104586479356e-06, 'epoch': 0.21} + 21%|██ | 2565/12188 [5:35:25<19:17:21, 7.22s/it] 21%|██ | 2566/12188 [5:35:32<19:13:33, 7.19s/it] {'loss': 0.4276, 'grad_norm': 0.8221396166899103, 'learning_rate': 9.170313005024347e-06, 'epoch': 0.21} + 21%|██ | 2566/12188 [5:35:32<19:13:33, 7.19s/it] 21%|██ | 2567/12188 [5:35:41<20:18:51, 7.60s/it] {'loss': 0.4181, 'grad_norm': 0.6995004447663062, 'learning_rate': 9.16957985075433e-06, 'epoch': 0.21} + 21%|██ | 2567/12188 [5:35:41<20:18:51, 7.60s/it] 21%|██ | 2568/12188 [5:35:49<20:46:27, 7.77s/it] {'loss': 0.3648, 'grad_norm': 0.6161706890809652, 'learning_rate': 9.168846402035283e-06, 'epoch': 0.21} + 21%|██ | 2568/12188 [5:35:49<20:46:27, 7.77s/it] 21%|██ | 2569/12188 [5:35:56<20:19:14, 7.61s/it] {'loss': 0.4007, 'grad_norm': 0.6669988110754128, 'learning_rate': 9.168112658918999e-06, 'epoch': 0.21} + 21%|██ | 2569/12188 [5:35:56<20:19:14, 7.61s/it] 21%|██ | 2570/12188 [5:36:03<19:50:29, 7.43s/it] {'loss': 0.3466, 'grad_norm': 0.6396015474469983, 'learning_rate': 9.167378621457299e-06, 'epoch': 0.21} + 21%|██ | 2570/12188 [5:36:03<19:50:29, 7.43s/it] 21%|██ | 2571/12188 [5:36:11<20:06:52, 7.53s/it] {'loss': 0.3673, 'grad_norm': 0.6025791702638675, 'learning_rate': 9.166644289702014e-06, 'epoch': 0.21} + 21%|██ | 2571/12188 [5:36:11<20:06:52, 7.53s/it] 21%|██ | 2572/12188 [5:36:18<19:25:01, 7.27s/it] {'loss': 0.3587, 'grad_norm': 0.6380567832519853, 'learning_rate': 9.165909663705003e-06, 'epoch': 0.21} + 21%|██ | 2572/12188 [5:36:18<19:25:01, 7.27s/it] 21%|██ | 2573/12188 [5:36:25<19:12:10, 7.19s/it] {'loss': 0.3862, 'grad_norm': 0.6344471060072117, 'learning_rate': 9.165174743518147e-06, 'epoch': 0.21} + 21%|██ | 2573/12188 [5:36:25<19:12:10, 7.19s/it] 21%|██ | 2574/12188 [5:36:33<19:43:59, 7.39s/it] {'loss': 0.4191, 'grad_norm': 0.9000524314741505, 'learning_rate': 9.16443952919334e-06, 'epoch': 0.21} + 21%|██ | 2574/12188 [5:36:33<19:43:59, 7.39s/it] 21%|██ | 2575/12188 [5:36:40<19:32:54, 7.32s/it] {'loss': 0.3857, 'grad_norm': 0.680214900401495, 'learning_rate': 9.163704020782507e-06, 'epoch': 0.21} + 21%|██ | 2575/12188 [5:36:40<19:32:54, 7.32s/it] 21%|██ | 2576/12188 [5:36:47<19:25:12, 7.27s/it] {'loss': 0.3643, 'grad_norm': 0.6629947457461781, 'learning_rate': 9.162968218337583e-06, 'epoch': 0.21} + 21%|██ | 2576/12188 [5:36:47<19:25:12, 7.27s/it] 21%|██ | 2577/12188 [5:36:54<19:05:40, 7.15s/it] {'loss': 0.3956, 'grad_norm': 0.6978901261857845, 'learning_rate': 9.162232121910533e-06, 'epoch': 0.21} + 21%|██ | 2577/12188 [5:36:54<19:05:40, 7.15s/it] 21%|██ | 2578/12188 [5:37:01<18:53:03, 7.07s/it] {'loss': 0.3834, 'grad_norm': 0.8236785799541977, 'learning_rate': 9.161495731553339e-06, 'epoch': 0.21} + 21%|██ | 2578/12188 [5:37:01<18:53:03, 7.07s/it] 21%|██ | 2579/12188 [5:37:08<19:28:06, 7.29s/it] {'loss': 0.3672, 'grad_norm': 0.8487063045216624, 'learning_rate': 9.160759047318e-06, 'epoch': 0.21} + 21%|██ | 2579/12188 [5:37:08<19:28:06, 7.29s/it] 21%|██ | 2580/12188 [5:37:16<19:32:38, 7.32s/it] {'loss': 0.4268, 'grad_norm': 0.7979938079659996, 'learning_rate': 9.160022069256542e-06, 'epoch': 0.21} + 21%|██ | 2580/12188 [5:37:16<19:32:38, 7.32s/it] 21%|██ | 2581/12188 [5:37:23<19:13:39, 7.21s/it] {'loss': 0.3602, 'grad_norm': 0.6242965821688784, 'learning_rate': 9.159284797421008e-06, 'epoch': 0.21} + 21%|██ | 2581/12188 [5:37:23<19:13:39, 7.21s/it] 21%|██ | 2582/12188 [5:37:30<18:54:52, 7.09s/it] {'loss': 0.3797, 'grad_norm': 0.7426535469099641, 'learning_rate': 9.158547231863467e-06, 'epoch': 0.21} + 21%|██ | 2582/12188 [5:37:30<18:54:52, 7.09s/it] 21%|██ | 2583/12188 [5:37:37<18:55:21, 7.09s/it] {'loss': 0.3661, 'grad_norm': 0.5976764249646294, 'learning_rate': 9.157809372636e-06, 'epoch': 0.21} + 21%|██ | 2583/12188 [5:37:37<18:55:21, 7.09s/it] 21%|██ | 2584/12188 [5:37:44<19:03:41, 7.15s/it] {'loss': 0.3893, 'grad_norm': 0.6650128016650478, 'learning_rate': 9.157071219790716e-06, 'epoch': 0.21} + 21%|██ | 2584/12188 [5:37:44<19:03:41, 7.15s/it] 21%|██ | 2585/12188 [5:37:51<18:53:46, 7.08s/it] {'loss': 0.4041, 'grad_norm': 0.6348002417281701, 'learning_rate': 9.156332773379741e-06, 'epoch': 0.21} + 21%|██ | 2585/12188 [5:37:51<18:53:46, 7.08s/it] 21%|██ | 2586/12188 [5:37:58<18:54:27, 7.09s/it] {'loss': 0.3645, 'grad_norm': 0.837936166012152, 'learning_rate': 9.155594033455223e-06, 'epoch': 0.21} + 21%|██ | 2586/12188 [5:37:58<18:54:27, 7.09s/it] 21%|██ | 2587/12188 [5:38:05<18:34:38, 6.97s/it] {'loss': 0.3684, 'grad_norm': 0.9041537207802659, 'learning_rate': 9.15485500006933e-06, 'epoch': 0.21} + 21%|██ | 2587/12188 [5:38:05<18:34:38, 6.97s/it] 21%|██ | 2588/12188 [5:38:12<18:34:50, 6.97s/it] {'loss': 0.36, 'grad_norm': 0.9448503132247393, 'learning_rate': 9.154115673274254e-06, 'epoch': 0.21} + 21%|██ | 2588/12188 [5:38:12<18:34:50, 6.97s/it] 21%|██ | 2589/12188 [5:38:19<18:57:39, 7.11s/it] {'loss': 0.3647, 'grad_norm': 0.9388235586690427, 'learning_rate': 9.153376053122202e-06, 'epoch': 0.21} + 21%|██ | 2589/12188 [5:38:19<18:57:39, 7.11s/it] 21%|██▏ | 2590/12188 [5:38:26<18:51:42, 7.07s/it] {'loss': 0.3952, 'grad_norm': 0.7908265667135228, 'learning_rate': 9.152636139665406e-06, 'epoch': 0.21} + 21%|██▏ | 2590/12188 [5:38:26<18:51:42, 7.07s/it] 21%|██▏ | 2591/12188 [5:38:33<18:32:15, 6.95s/it] {'loss': 0.3594, 'grad_norm': 0.6552883589284766, 'learning_rate': 9.151895932956118e-06, 'epoch': 0.21} + 21%|██▏ | 2591/12188 [5:38:33<18:32:15, 6.95s/it] 21%|██▏ | 2592/12188 [5:38:40<18:26:11, 6.92s/it] {'loss': 0.3815, 'grad_norm': 1.4012111163643108, 'learning_rate': 9.151155433046609e-06, 'epoch': 0.21} + 21%|██▏ | 2592/12188 [5:38:40<18:26:11, 6.92s/it] 21%|██▏ | 2593/12188 [5:38:47<18:58:38, 7.12s/it] {'loss': 0.4062, 'grad_norm': 0.6775788641429835, 'learning_rate': 9.150414639989173e-06, 'epoch': 0.21} + 21%|██▏ | 2593/12188 [5:38:47<18:58:38, 7.12s/it] 21%|██▏ | 2594/12188 [5:38:55<19:55:50, 7.48s/it] {'loss': 0.3591, 'grad_norm': 0.6120355009000017, 'learning_rate': 9.149673553836124e-06, 'epoch': 0.21} + 21%|██▏ | 2594/12188 [5:38:55<19:55:50, 7.48s/it] 21%|██▏ | 2595/12188 [5:39:03<19:54:20, 7.47s/it] {'loss': 0.3783, 'grad_norm': 0.6974272858927714, 'learning_rate': 9.148932174639795e-06, 'epoch': 0.21} + 21%|██▏ | 2595/12188 [5:39:03<19:54:20, 7.47s/it] 21%|██▏ | 2596/12188 [5:39:11<20:11:34, 7.58s/it] {'loss': 0.3897, 'grad_norm': 0.6795583856768193, 'learning_rate': 9.14819050245254e-06, 'epoch': 0.21} + 21%|██▏ | 2596/12188 [5:39:11<20:11:34, 7.58s/it] 21%|██▏ | 2597/12188 [5:39:18<19:57:35, 7.49s/it] {'loss': 0.3813, 'grad_norm': 0.6272430445407098, 'learning_rate': 9.14744853732674e-06, 'epoch': 0.21} + 21%|██▏ | 2597/12188 [5:39:18<19:57:35, 7.49s/it] 21%|██▏ | 2598/12188 [5:39:26<20:09:02, 7.56s/it] {'loss': 0.4413, 'grad_norm': 0.6800431062910558, 'learning_rate': 9.146706279314786e-06, 'epoch': 0.21} + 21%|██▏ | 2598/12188 [5:39:26<20:09:02, 7.56s/it] 21%|██▏ | 2599/12188 [5:39:33<19:36:26, 7.36s/it] {'loss': 0.4259, 'grad_norm': 0.6591762708312267, 'learning_rate': 9.145963728469094e-06, 'epoch': 0.21} + 21%|██▏ | 2599/12188 [5:39:33<19:36:26, 7.36s/it] 21%|██▏ | 2600/12188 [5:39:39<19:07:50, 7.18s/it] {'loss': 0.4142, 'grad_norm': 0.7199618632597287, 'learning_rate': 9.145220884842108e-06, 'epoch': 0.21} + 21%|██▏ | 2600/12188 [5:39:39<19:07:50, 7.18s/it] 21%|██▏ | 2601/12188 [5:39:47<19:19:41, 7.26s/it] {'loss': 0.3779, 'grad_norm': 0.642801519987645, 'learning_rate': 9.144477748486282e-06, 'epoch': 0.21} + 21%|██▏ | 2601/12188 [5:39:47<19:19:41, 7.26s/it] 21%|██▏ | 2602/12188 [5:39:55<20:09:03, 7.57s/it] {'loss': 0.3833, 'grad_norm': 0.7541864731717199, 'learning_rate': 9.143734319454097e-06, 'epoch': 0.21} + 21%|██▏ | 2602/12188 [5:39:55<20:09:03, 7.57s/it] 21%|██▏ | 2603/12188 [5:40:02<19:22:53, 7.28s/it] {'loss': 0.4494, 'grad_norm': 0.6772462637293893, 'learning_rate': 9.142990597798049e-06, 'epoch': 0.21} + 21%|██▏ | 2603/12188 [5:40:02<19:22:53, 7.28s/it] 21%|██▏ | 2604/12188 [5:40:09<19:30:40, 7.33s/it] {'loss': 0.3303, 'grad_norm': 0.7399981041059068, 'learning_rate': 9.142246583570662e-06, 'epoch': 0.21} + 21%|██▏ | 2604/12188 [5:40:09<19:30:40, 7.33s/it] 21%|██▏ | 2605/12188 [5:40:16<19:27:42, 7.31s/it] {'loss': 0.4207, 'grad_norm': 0.6548829404872172, 'learning_rate': 9.141502276824478e-06, 'epoch': 0.21} + 21%|██▏ | 2605/12188 [5:40:16<19:27:42, 7.31s/it] 21%|██▏ | 2606/12188 [5:40:23<18:58:02, 7.13s/it] {'loss': 0.3586, 'grad_norm': 0.8806390532265409, 'learning_rate': 9.140757677612056e-06, 'epoch': 0.21} + 21%|██▏ | 2606/12188 [5:40:23<18:58:02, 7.13s/it] 21%|██▏ | 2607/12188 [5:40:32<20:06:49, 7.56s/it] {'loss': 0.3622, 'grad_norm': 1.4090753518738564, 'learning_rate': 9.140012785985978e-06, 'epoch': 0.21} + 21%|██▏ | 2607/12188 [5:40:32<20:06:49, 7.56s/it] 21%|██▏ | 2608/12188 [5:40:39<20:02:05, 7.53s/it] {'loss': 0.4111, 'grad_norm': 0.9061500651481044, 'learning_rate': 9.139267601998852e-06, 'epoch': 0.21} + 21%|██▏ | 2608/12188 [5:40:39<20:02:05, 7.53s/it] 21%|██▏ | 2609/12188 [5:40:46<19:26:14, 7.31s/it] {'loss': 0.4333, 'grad_norm': 0.6736693473272303, 'learning_rate': 9.138522125703295e-06, 'epoch': 0.21} + 21%|██▏ | 2609/12188 [5:40:46<19:26:14, 7.31s/it] 21%|██▏ | 2610/12188 [5:40:54<19:36:50, 7.37s/it] {'loss': 0.3415, 'grad_norm': 0.6506427335828158, 'learning_rate': 9.137776357151956e-06, 'epoch': 0.21} + 21%|██▏ | 2610/12188 [5:40:54<19:36:50, 7.37s/it] 21%|██▏ | 2611/12188 [5:41:01<19:21:10, 7.27s/it] {'loss': 0.3635, 'grad_norm': 0.652079053853386, 'learning_rate': 9.137030296397499e-06, 'epoch': 0.21} + 21%|██▏ | 2611/12188 [5:41:01<19:21:10, 7.27s/it] 21%|██▏ | 2612/12188 [5:41:09<19:57:42, 7.50s/it] {'loss': 0.4273, 'grad_norm': 0.8667769584999436, 'learning_rate': 9.136283943492608e-06, 'epoch': 0.21} + 21%|██▏ | 2612/12188 [5:41:09<19:57:42, 7.50s/it] 21%|██▏ | 2613/12188 [5:41:16<19:34:55, 7.36s/it] {'loss': 0.4297, 'grad_norm': 1.332103470810094, 'learning_rate': 9.135537298489991e-06, 'epoch': 0.21} + 21%|██▏ | 2613/12188 [5:41:16<19:34:55, 7.36s/it] 21%|██▏ | 2614/12188 [5:41:22<19:01:00, 7.15s/it] {'loss': 0.3675, 'grad_norm': 0.6843070182332299, 'learning_rate': 9.134790361442374e-06, 'epoch': 0.21} + 21%|██▏ | 2614/12188 [5:41:22<19:01:00, 7.15s/it] 21%|██▏ | 2615/12188 [5:41:29<18:33:51, 6.98s/it] {'loss': 0.4276, 'grad_norm': 0.8169727244478298, 'learning_rate': 9.134043132402505e-06, 'epoch': 0.21} + 21%|██▏ | 2615/12188 [5:41:29<18:33:51, 6.98s/it] 21%|██▏ | 2616/12188 [5:41:36<18:50:46, 7.09s/it] {'loss': 0.3974, 'grad_norm': 0.6558182686860287, 'learning_rate': 9.133295611423151e-06, 'epoch': 0.21} + 21%|██▏ | 2616/12188 [5:41:36<18:50:46, 7.09s/it] 21%|██▏ | 2617/12188 [5:41:43<18:57:06, 7.13s/it] {'loss': 0.3799, 'grad_norm': 0.7150293464944485, 'learning_rate': 9.132547798557102e-06, 'epoch': 0.21} + 21%|██▏ | 2617/12188 [5:41:43<18:57:06, 7.13s/it] 21%|██▏ | 2618/12188 [5:41:51<19:17:37, 7.26s/it] {'loss': 0.3392, 'grad_norm': 0.635076352796439, 'learning_rate': 9.131799693857166e-06, 'epoch': 0.21} + 21%|██▏ | 2618/12188 [5:41:51<19:17:37, 7.26s/it] 21%|██▏ | 2619/12188 [5:41:58<18:58:34, 7.14s/it] {'loss': 0.3774, 'grad_norm': 0.708377456032219, 'learning_rate': 9.131051297376177e-06, 'epoch': 0.21} + 21%|██▏ | 2619/12188 [5:41:58<18:58:34, 7.14s/it] 21%|██▏ | 2620/12188 [5:42:05<18:41:29, 7.03s/it] {'loss': 0.4082, 'grad_norm': 2.2925515389135036, 'learning_rate': 9.130302609166978e-06, 'epoch': 0.21} + 21%|██▏ | 2620/12188 [5:42:05<18:41:29, 7.03s/it] 22%|██▏ | 2621/12188 [5:42:12<19:10:36, 7.22s/it] {'loss': 0.3554, 'grad_norm': 0.6557596775279755, 'learning_rate': 9.129553629282448e-06, 'epoch': 0.22} + 22%|██▏ | 2621/12188 [5:42:12<19:10:36, 7.22s/it] 22%|██▏ | 2622/12188 [5:42:19<19:03:13, 7.17s/it] {'loss': 0.3667, 'grad_norm': 0.8698250247340976, 'learning_rate': 9.128804357775474e-06, 'epoch': 0.22} + 22%|██▏ | 2622/12188 [5:42:19<19:03:13, 7.17s/it] 22%|██▏ | 2623/12188 [5:42:27<19:08:38, 7.21s/it] {'loss': 0.3783, 'grad_norm': 0.7021177093500526, 'learning_rate': 9.12805479469897e-06, 'epoch': 0.22} + 22%|██▏ | 2623/12188 [5:42:27<19:08:38, 7.21s/it] 22%|██▏ | 2624/12188 [5:42:33<18:48:20, 7.08s/it] {'loss': 0.3519, 'grad_norm': 0.6871955603836548, 'learning_rate': 9.127304940105869e-06, 'epoch': 0.22} + 22%|██▏ | 2624/12188 [5:42:33<18:48:20, 7.08s/it] 22%|██▏ | 2625/12188 [5:42:40<18:35:21, 7.00s/it] {'loss': 0.3883, 'grad_norm': 0.8616898177224022, 'learning_rate': 9.126554794049123e-06, 'epoch': 0.22} + 22%|██▏ | 2625/12188 [5:42:40<18:35:21, 7.00s/it] 22%|██▏ | 2626/12188 [5:42:47<18:40:48, 7.03s/it] {'loss': 0.3411, 'grad_norm': 1.148924209720728, 'learning_rate': 9.125804356581708e-06, 'epoch': 0.22} + 22%|██▏ | 2626/12188 [5:42:47<18:40:48, 7.03s/it] 22%|██▏ | 2627/12188 [5:42:54<18:28:36, 6.96s/it] {'loss': 0.3902, 'grad_norm': 0.66376179724882, 'learning_rate': 9.125053627756619e-06, 'epoch': 0.22} + 22%|██▏ | 2627/12188 [5:42:54<18:28:36, 6.96s/it] 22%|██▏ | 2628/12188 [5:43:01<18:45:44, 7.07s/it] {'loss': 0.3734, 'grad_norm': 0.8208795214390936, 'learning_rate': 9.124302607626868e-06, 'epoch': 0.22} + 22%|██▏ | 2628/12188 [5:43:01<18:45:44, 7.07s/it] 22%|██▏ | 2629/12188 [5:43:08<18:32:38, 6.98s/it] {'loss': 0.3514, 'grad_norm': 0.7665815928861057, 'learning_rate': 9.123551296245494e-06, 'epoch': 0.22} + 22%|██▏ | 2629/12188 [5:43:08<18:32:38, 6.98s/it] 22%|██▏ | 2630/12188 [5:43:15<18:13:06, 6.86s/it] {'loss': 0.3771, 'grad_norm': 0.7372555374851242, 'learning_rate': 9.122799693665554e-06, 'epoch': 0.22} + 22%|██▏ | 2630/12188 [5:43:15<18:13:06, 6.86s/it] 22%|██▏ | 2631/12188 [5:43:23<19:15:27, 7.25s/it] {'loss': 0.3798, 'grad_norm': 0.7559201047176727, 'learning_rate': 9.122047799940121e-06, 'epoch': 0.22} + 22%|██▏ | 2631/12188 [5:43:23<19:15:27, 7.25s/it] 22%|██▏ | 2632/12188 [5:43:30<19:09:46, 7.22s/it] {'loss': 0.3952, 'grad_norm': 0.6249792617513545, 'learning_rate': 9.121295615122297e-06, 'epoch': 0.22} + 22%|██▏ | 2632/12188 [5:43:30<19:09:46, 7.22s/it] 22%|██▏ | 2633/12188 [5:43:39<20:40:17, 7.79s/it] {'loss': 0.3692, 'grad_norm': 0.7896245110270724, 'learning_rate': 9.120543139265196e-06, 'epoch': 0.22} + 22%|██▏ | 2633/12188 [5:43:39<20:40:17, 7.79s/it] 22%|██▏ | 2634/12188 [5:43:46<20:08:00, 7.59s/it] {'loss': 0.3731, 'grad_norm': 0.6755468976710258, 'learning_rate': 9.11979037242196e-06, 'epoch': 0.22} + 22%|██▏ | 2634/12188 [5:43:46<20:08:00, 7.59s/it] 22%|██▏ | 2635/12188 [5:43:53<19:29:57, 7.35s/it] {'loss': 0.425, 'grad_norm': 0.7859251495163223, 'learning_rate': 9.119037314645745e-06, 'epoch': 0.22} + 22%|██▏ | 2635/12188 [5:43:53<19:29:57, 7.35s/it] 22%|██▏ | 2636/12188 [5:44:00<19:16:15, 7.26s/it] {'loss': 0.3335, 'grad_norm': 0.6945618870320187, 'learning_rate': 9.118283965989735e-06, 'epoch': 0.22} + 22%|██▏ | 2636/12188 [5:44:00<19:16:15, 7.26s/it] 22%|██▏ | 2637/12188 [5:44:07<19:02:16, 7.18s/it] {'loss': 0.3932, 'grad_norm': 0.6707223388070855, 'learning_rate': 9.117530326507128e-06, 'epoch': 0.22} + 22%|██▏ | 2637/12188 [5:44:07<19:02:16, 7.18s/it] 22%|██▏ | 2638/12188 [5:44:15<19:18:26, 7.28s/it] {'loss': 0.3865, 'grad_norm': 0.7516661768433814, 'learning_rate': 9.116776396251142e-06, 'epoch': 0.22} + 22%|██▏ | 2638/12188 [5:44:15<19:18:26, 7.28s/it] 22%|██▏ | 2639/12188 [5:44:22<19:33:47, 7.38s/it] {'loss': 0.3771, 'grad_norm': 0.6168859964025974, 'learning_rate': 9.116022175275024e-06, 'epoch': 0.22} + 22%|██▏ | 2639/12188 [5:44:22<19:33:47, 7.38s/it] 22%|██▏ | 2640/12188 [5:44:30<19:25:50, 7.33s/it] {'loss': 0.3855, 'grad_norm': 0.7417190074416967, 'learning_rate': 9.11526766363203e-06, 'epoch': 0.22} + 22%|██▏ | 2640/12188 [5:44:30<19:25:50, 7.33s/it] 22%|██▏ | 2641/12188 [5:44:37<19:24:52, 7.32s/it] {'loss': 0.3561, 'grad_norm': 0.6922904410950621, 'learning_rate': 9.114512861375446e-06, 'epoch': 0.22} + 22%|██▏ | 2641/12188 [5:44:37<19:24:52, 7.32s/it] 22%|██▏ | 2642/12188 [5:44:44<19:30:06, 7.35s/it] {'loss': 0.3837, 'grad_norm': 0.6007934887304395, 'learning_rate': 9.113757768558575e-06, 'epoch': 0.22} + 22%|██▏ | 2642/12188 [5:44:44<19:30:06, 7.35s/it] 22%|██▏ | 2643/12188 [5:44:51<19:18:02, 7.28s/it] {'loss': 0.429, 'grad_norm': 0.6703042890440778, 'learning_rate': 9.113002385234741e-06, 'epoch': 0.22} + 22%|██▏ | 2643/12188 [5:44:51<19:18:02, 7.28s/it] 22%|██▏ | 2644/12188 [5:44:59<19:30:27, 7.36s/it] {'loss': 0.3762, 'grad_norm': 0.6436295562014803, 'learning_rate': 9.112246711457284e-06, 'epoch': 0.22} + 22%|██▏ | 2644/12188 [5:44:59<19:30:27, 7.36s/it] 22%|██▏ | 2645/12188 [5:45:05<18:47:06, 7.09s/it] {'loss': 0.3673, 'grad_norm': 0.6731962763196602, 'learning_rate': 9.111490747279573e-06, 'epoch': 0.22} + 22%|██▏ | 2645/12188 [5:45:05<18:47:06, 7.09s/it] 22%|██▏ | 2646/12188 [5:45:12<18:43:18, 7.06s/it] {'loss': 0.3567, 'grad_norm': 0.6581652784353491, 'learning_rate': 9.11073449275499e-06, 'epoch': 0.22} + 22%|██▏ | 2646/12188 [5:45:12<18:43:18, 7.06s/it] 22%|██▏ | 2647/12188 [5:45:19<18:31:48, 6.99s/it] {'loss': 0.4305, 'grad_norm': 0.6521725079754717, 'learning_rate': 9.109977947936943e-06, 'epoch': 0.22} + 22%|██▏ | 2647/12188 [5:45:19<18:31:48, 6.99s/it] 22%|██▏ | 2648/12188 [5:45:26<18:18:01, 6.91s/it] {'loss': 0.4119, 'grad_norm': 0.6362530975603055, 'learning_rate': 9.109221112878855e-06, 'epoch': 0.22} + 22%|██▏ | 2648/12188 [5:45:26<18:18:01, 6.91s/it] 22%|██▏ | 2649/12188 [5:45:33<18:15:58, 6.89s/it] {'loss': 0.3932, 'grad_norm': 0.6604054075935757, 'learning_rate': 9.108463987634174e-06, 'epoch': 0.22} + 22%|██▏ | 2649/12188 [5:45:33<18:15:58, 6.89s/it] 22%|██▏ | 2650/12188 [5:45:41<19:03:59, 7.20s/it] {'loss': 0.3693, 'grad_norm': 0.661250339433179, 'learning_rate': 9.107706572256368e-06, 'epoch': 0.22} + 22%|██▏ | 2650/12188 [5:45:41<19:03:59, 7.20s/it] 22%|██▏ | 2651/12188 [5:45:48<18:49:34, 7.11s/it] {'loss': 0.3923, 'grad_norm': 0.6886547209841107, 'learning_rate': 9.10694886679892e-06, 'epoch': 0.22} + 22%|██▏ | 2651/12188 [5:45:48<18:49:34, 7.11s/it] 22%|██▏ | 2652/12188 [5:45:55<19:19:45, 7.30s/it] {'loss': 0.3752, 'grad_norm': 0.628713327526805, 'learning_rate': 9.106190871315346e-06, 'epoch': 0.22} + 22%|██▏ | 2652/12188 [5:45:55<19:19:45, 7.30s/it] 22%|██▏ | 2653/12188 [5:46:03<19:24:19, 7.33s/it] {'loss': 0.3616, 'grad_norm': 0.6850177586994058, 'learning_rate': 9.105432585859166e-06, 'epoch': 0.22} + 22%|██▏ | 2653/12188 [5:46:03<19:24:19, 7.33s/it] 22%|██▏ | 2654/12188 [5:46:10<19:15:54, 7.27s/it] {'loss': 0.3732, 'grad_norm': 0.6515376344267589, 'learning_rate': 9.104674010483935e-06, 'epoch': 0.22} + 22%|██▏ | 2654/12188 [5:46:10<19:15:54, 7.27s/it] 22%|██▏ | 2655/12188 [5:46:18<19:41:35, 7.44s/it] {'loss': 0.3699, 'grad_norm': 0.6437056902115268, 'learning_rate': 9.10391514524322e-06, 'epoch': 0.22} + 22%|██▏ | 2655/12188 [5:46:18<19:41:35, 7.44s/it] 22%|██▏ | 2656/12188 [5:46:24<19:04:57, 7.21s/it] {'loss': 0.3854, 'grad_norm': 0.6286454480122333, 'learning_rate': 9.10315599019061e-06, 'epoch': 0.22} + 22%|██▏ | 2656/12188 [5:46:24<19:04:57, 7.21s/it] 22%|██▏ | 2657/12188 [5:46:31<19:01:28, 7.19s/it] {'loss': 0.3854, 'grad_norm': 0.6269492408316368, 'learning_rate': 9.102396545379717e-06, 'epoch': 0.22} + 22%|██▏ | 2657/12188 [5:46:31<19:01:28, 7.19s/it] 22%|██▏ | 2658/12188 [5:46:39<18:58:42, 7.17s/it] {'loss': 0.3743, 'grad_norm': 0.7169312575402552, 'learning_rate': 9.101636810864169e-06, 'epoch': 0.22} + 22%|██▏ | 2658/12188 [5:46:39<18:58:42, 7.17s/it] 22%|██▏ | 2659/12188 [5:46:46<18:46:15, 7.09s/it] {'loss': 0.4122, 'grad_norm': 0.6514459619909193, 'learning_rate': 9.10087678669762e-06, 'epoch': 0.22} + 22%|██▏ | 2659/12188 [5:46:46<18:46:15, 7.09s/it] 22%|██▏ | 2660/12188 [5:46:53<18:58:25, 7.17s/it] {'loss': 0.4425, 'grad_norm': 0.6978395549137809, 'learning_rate': 9.100116472933743e-06, 'epoch': 0.22} + 22%|██▏ | 2660/12188 [5:46:53<18:58:25, 7.17s/it] 22%|██▏ | 2661/12188 [5:47:00<18:37:45, 7.04s/it] {'loss': 0.3585, 'grad_norm': 0.5843856624211788, 'learning_rate': 9.099355869626228e-06, 'epoch': 0.22} + 22%|██▏ | 2661/12188 [5:47:00<18:37:45, 7.04s/it] 22%|██▏ | 2662/12188 [5:47:07<18:30:43, 7.00s/it] {'loss': 0.3512, 'grad_norm': 0.6822448143616036, 'learning_rate': 9.098594976828784e-06, 'epoch': 0.22} + 22%|██▏ | 2662/12188 [5:47:07<18:30:43, 7.00s/it] 22%|██▏ | 2663/12188 [5:47:13<18:18:01, 6.92s/it] {'loss': 0.3477, 'grad_norm': 0.6099835789565957, 'learning_rate': 9.09783379459515e-06, 'epoch': 0.22} + 22%|██▏ | 2663/12188 [5:47:13<18:18:01, 6.92s/it] 22%|██▏ | 2664/12188 [5:47:21<18:37:18, 7.04s/it] {'loss': 0.4139, 'grad_norm': 0.6966032252966249, 'learning_rate': 9.097072322979077e-06, 'epoch': 0.22} + 22%|██▏ | 2664/12188 [5:47:21<18:37:18, 7.04s/it] 22%|██▏ | 2665/12188 [5:47:28<19:04:08, 7.21s/it] {'loss': 0.3694, 'grad_norm': 0.650800889925391, 'learning_rate': 9.09631056203434e-06, 'epoch': 0.22} + 22%|██▏ | 2665/12188 [5:47:28<19:04:08, 7.21s/it] 22%|██▏ | 2666/12188 [5:47:35<19:07:44, 7.23s/it] {'loss': 0.4044, 'grad_norm': 0.7547165291255985, 'learning_rate': 9.095548511814731e-06, 'epoch': 0.22} + 22%|██▏ | 2666/12188 [5:47:35<19:07:44, 7.23s/it] 22%|██▏ | 2667/12188 [5:47:43<19:46:05, 7.47s/it] {'loss': 0.3549, 'grad_norm': 0.6952654734821276, 'learning_rate': 9.094786172374066e-06, 'epoch': 0.22} + 22%|██▏ | 2667/12188 [5:47:43<19:46:05, 7.47s/it] 22%|██▏ | 2668/12188 [5:47:50<19:09:33, 7.25s/it] {'loss': 0.3856, 'grad_norm': 0.7156367162993398, 'learning_rate': 9.094023543766181e-06, 'epoch': 0.22} + 22%|██▏ | 2668/12188 [5:47:50<19:09:33, 7.25s/it] 22%|██▏ | 2669/12188 [5:47:57<18:48:24, 7.11s/it] {'loss': 0.3989, 'grad_norm': 1.2960325982966123, 'learning_rate': 9.09326062604493e-06, 'epoch': 0.22} + 22%|██▏ | 2669/12188 [5:47:57<18:48:24, 7.11s/it] 22%|██▏ | 2670/12188 [5:48:04<19:01:58, 7.20s/it] {'loss': 0.377, 'grad_norm': 0.6882163007574524, 'learning_rate': 9.09249741926419e-06, 'epoch': 0.22} + 22%|██▏ | 2670/12188 [5:48:04<19:01:58, 7.20s/it] 22%|██▏ | 2671/12188 [5:48:11<18:42:27, 7.08s/it] {'loss': 0.4067, 'grad_norm': 0.6728774465760998, 'learning_rate': 9.091733923477859e-06, 'epoch': 0.22} + 22%|██▏ | 2671/12188 [5:48:11<18:42:27, 7.08s/it] 22%|██▏ | 2672/12188 [5:48:19<18:57:47, 7.17s/it] {'loss': 0.3981, 'grad_norm': 0.9819559185838568, 'learning_rate': 9.09097013873985e-06, 'epoch': 0.22} + 22%|██▏ | 2672/12188 [5:48:19<18:57:47, 7.17s/it] 22%|██▏ | 2673/12188 [5:48:26<19:16:54, 7.30s/it] {'loss': 0.3487, 'grad_norm': 0.6877635741797897, 'learning_rate': 9.090206065104104e-06, 'epoch': 0.22} + 22%|██▏ | 2673/12188 [5:48:26<19:16:54, 7.30s/it] 22%|██▏ | 2674/12188 [5:48:34<19:19:59, 7.32s/it] {'loss': 0.3669, 'grad_norm': 0.6416883078539304, 'learning_rate': 9.089441702624577e-06, 'epoch': 0.22} + 22%|██▏ | 2674/12188 [5:48:34<19:19:59, 7.32s/it] 22%|██▏ | 2675/12188 [5:48:41<19:23:13, 7.34s/it] {'loss': 0.3395, 'grad_norm': 0.6218478402361466, 'learning_rate': 9.088677051355245e-06, 'epoch': 0.22} + 22%|██▏ | 2675/12188 [5:48:41<19:23:13, 7.34s/it] 22%|██▏ | 2676/12188 [5:48:49<19:43:38, 7.47s/it] {'loss': 0.4198, 'grad_norm': 0.694439705721062, 'learning_rate': 9.087912111350112e-06, 'epoch': 0.22} + 22%|██▏ | 2676/12188 [5:48:49<19:43:38, 7.47s/it] 22%|██▏ | 2677/12188 [5:48:56<19:38:30, 7.43s/it] {'loss': 0.3804, 'grad_norm': 0.6097141767785935, 'learning_rate': 9.08714688266319e-06, 'epoch': 0.22} + 22%|██▏ | 2677/12188 [5:48:56<19:38:30, 7.43s/it] 22%|██▏ | 2678/12188 [5:49:03<19:36:54, 7.43s/it] {'loss': 0.3828, 'grad_norm': 0.614045430071774, 'learning_rate': 9.086381365348525e-06, 'epoch': 0.22} + 22%|██▏ | 2678/12188 [5:49:03<19:36:54, 7.43s/it] 22%|██▏ | 2679/12188 [5:49:10<19:12:57, 7.27s/it] {'loss': 0.4111, 'grad_norm': 0.6774049162390624, 'learning_rate': 9.085615559460172e-06, 'epoch': 0.22} + 22%|██▏ | 2679/12188 [5:49:10<19:12:57, 7.27s/it] 22%|██▏ | 2680/12188 [5:49:17<18:37:35, 7.05s/it] {'loss': 0.3865, 'grad_norm': 0.6448850209351373, 'learning_rate': 9.08484946505221e-06, 'epoch': 0.22} + 22%|██▏ | 2680/12188 [5:49:17<18:37:35, 7.05s/it] 22%|██▏ | 2681/12188 [5:49:25<19:18:10, 7.31s/it] {'loss': 0.3769, 'grad_norm': 0.6264325804312416, 'learning_rate': 9.084083082178745e-06, 'epoch': 0.22} + 22%|██▏ | 2681/12188 [5:49:25<19:18:10, 7.31s/it] 22%|██▏ | 2682/12188 [5:49:32<19:18:33, 7.31s/it] {'loss': 0.3766, 'grad_norm': 0.6133743105482133, 'learning_rate': 9.083316410893892e-06, 'epoch': 0.22} + 22%|██▏ | 2682/12188 [5:49:32<19:18:33, 7.31s/it] 22%|██▏ | 2683/12188 [5:49:39<19:12:19, 7.27s/it] {'loss': 0.3629, 'grad_norm': 0.651752604561037, 'learning_rate': 9.082549451251796e-06, 'epoch': 0.22} + 22%|██▏ | 2683/12188 [5:49:39<19:12:19, 7.27s/it] 22%|██▏ | 2684/12188 [5:49:48<20:29:43, 7.76s/it] {'loss': 0.368, 'grad_norm': 0.6227683259350826, 'learning_rate': 9.081782203306616e-06, 'epoch': 0.22} + 22%|██▏ | 2684/12188 [5:49:48<20:29:43, 7.76s/it] 22%|██▏ | 2685/12188 [5:49:55<19:56:44, 7.56s/it] {'loss': 0.4102, 'grad_norm': 0.7116011986603388, 'learning_rate': 9.081014667112532e-06, 'epoch': 0.22} + 22%|██▏ | 2685/12188 [5:49:55<19:56:44, 7.56s/it] 22%|██▏ | 2686/12188 [5:50:03<19:45:44, 7.49s/it] {'loss': 0.3742, 'grad_norm': 0.6681303963890041, 'learning_rate': 9.080246842723753e-06, 'epoch': 0.22} + 22%|██▏ | 2686/12188 [5:50:03<19:45:44, 7.49s/it] 22%|██▏ | 2687/12188 [5:50:09<19:12:32, 7.28s/it] {'loss': 0.3987, 'grad_norm': 0.7333917392920957, 'learning_rate': 9.079478730194496e-06, 'epoch': 0.22} + 22%|██▏ | 2687/12188 [5:50:09<19:12:32, 7.28s/it] 22%|██▏ | 2688/12188 [5:50:16<18:46:03, 7.11s/it] {'loss': 0.3475, 'grad_norm': 0.6272940318010276, 'learning_rate': 9.078710329579006e-06, 'epoch': 0.22} + 22%|██▏ | 2688/12188 [5:50:16<18:46:03, 7.11s/it] 22%|██▏ | 2689/12188 [5:50:23<18:15:43, 6.92s/it] {'loss': 0.3592, 'grad_norm': 1.2575045153798088, 'learning_rate': 9.077941640931544e-06, 'epoch': 0.22} + 22%|██▏ | 2689/12188 [5:50:23<18:15:43, 6.92s/it] 22%|██▏ | 2690/12188 [5:50:30<18:14:38, 6.91s/it] {'loss': 0.3685, 'grad_norm': 0.6259641010492106, 'learning_rate': 9.077172664306396e-06, 'epoch': 0.22} + 22%|██▏ | 2690/12188 [5:50:30<18:14:38, 6.91s/it] 22%|██▏ | 2691/12188 [5:50:37<18:41:29, 7.09s/it] {'loss': 0.3198, 'grad_norm': 0.5779463112069402, 'learning_rate': 9.076403399757865e-06, 'epoch': 0.22} + 22%|██▏ | 2691/12188 [5:50:37<18:41:29, 7.09s/it] 22%|██▏ | 2692/12188 [5:50:44<18:26:55, 6.99s/it] {'loss': 0.382, 'grad_norm': 0.6446623898945679, 'learning_rate': 9.075633847340275e-06, 'epoch': 0.22} + 22%|██▏ | 2692/12188 [5:50:44<18:26:55, 6.99s/it] 22%|██▏ | 2693/12188 [5:50:50<18:03:16, 6.85s/it] {'loss': 0.4045, 'grad_norm': 0.674686611828484, 'learning_rate': 9.07486400710797e-06, 'epoch': 0.22} + 22%|██▏ | 2693/12188 [5:50:50<18:03:16, 6.85s/it] 22%|██▏ | 2694/12188 [5:50:57<18:01:47, 6.84s/it] {'loss': 0.3674, 'grad_norm': 0.655929095543329, 'learning_rate': 9.074093879115315e-06, 'epoch': 0.22} + 22%|██▏ | 2694/12188 [5:50:57<18:01:47, 6.84s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1348, in _get_item + raise ValueError( +ValueError: Number of image tokens ['data/slider/other_screenshot/original/AudioControlPanel_1739898931.4442017.png'] does not match number of images None +[Try #0] Failed to fetch sample 1863756 in VC:s3://gui-agent/jedi/images/component_v1_130k/component_v1_130k_extracted/. Exception: Number of image tokens ['data/slider/other_screenshot/original/AudioControlPanel_1739898931.4442017.png'] does not match number of images None +Problematic sample: {'image': 'data/slider/other_screenshot/original/AudioControlPanel_1739898931.4442017.png', 'conversations': [], 'image_id': 'data/slider/other_screenshot/original/AudioControlPanel_1739898931.4442017.png'} + 22%|██▏ | 2695/12188 [5:51:04<17:52:11, 6.78s/it] {'loss': 0.3914, 'grad_norm': 0.8133682189270502, 'learning_rate': 9.073323463416699e-06, 'epoch': 0.22} + 22%|██▏ | 2695/12188 [5:51:04<17:52:11, 6.78s/it] 22%|██▏ | 2696/12188 [5:51:10<17:41:35, 6.71s/it] {'loss': 0.3774, 'grad_norm': 0.6622689135724624, 'learning_rate': 9.072552760066522e-06, 'epoch': 0.22} + 22%|██▏ | 2696/12188 [5:51:10<17:41:35, 6.71s/it] 22%|██▏ | 2697/12188 [5:51:19<19:18:35, 7.32s/it] {'loss': 0.3682, 'grad_norm': 0.6801801679045957, 'learning_rate': 9.071781769119211e-06, 'epoch': 0.22} + 22%|██▏ | 2697/12188 [5:51:19<19:18:35, 7.32s/it] 22%|██▏ | 2698/12188 [5:51:26<18:57:07, 7.19s/it] {'loss': 0.3935, 'grad_norm': 0.6157309758313722, 'learning_rate': 9.071010490629216e-06, 'epoch': 0.22} + 22%|██▏ | 2698/12188 [5:51:26<18:57:07, 7.19s/it] 22%|██▏ | 2699/12188 [5:51:33<19:08:54, 7.26s/it] {'loss': 0.3681, 'grad_norm': 0.6569473336696454, 'learning_rate': 9.070238924650998e-06, 'epoch': 0.22} + 22%|██▏ | 2699/12188 [5:51:33<19:08:54, 7.26s/it] 22%|██▏ | 2700/12188 [5:51:40<19:01:25, 7.22s/it] {'loss': 0.3589, 'grad_norm': 0.6018524020942407, 'learning_rate': 9.069467071239049e-06, 'epoch': 0.22} + 22%|██▏ | 2700/12188 [5:51:40<19:01:25, 7.22s/it] 22%|██▏ | 2701/12188 [5:51:47<18:48:31, 7.14s/it] {'loss': 0.3608, 'grad_norm': 0.6880986379218741, 'learning_rate': 9.068694930447871e-06, 'epoch': 0.22} + 22%|██▏ | 2701/12188 [5:51:47<18:48:31, 7.14s/it] 22%|██▏ | 2702/12188 [5:51:55<19:23:10, 7.36s/it] {'loss': 0.3577, 'grad_norm': 0.5745336980322849, 'learning_rate': 9.067922502331997e-06, 'epoch': 0.22} + 22%|██▏ | 2702/12188 [5:51:55<19:23:10, 7.36s/it] 22%|██▏ | 2703/12188 [5:52:03<19:19:04, 7.33s/it] {'loss': 0.3414, 'grad_norm': 0.637974002127837, 'learning_rate': 9.067149786945967e-06, 'epoch': 0.22} + 22%|██▏ | 2703/12188 [5:52:03<19:19:04, 7.33s/it] 22%|██▏ | 2704/12188 [5:52:09<18:44:18, 7.11s/it] {'loss': 0.3526, 'grad_norm': 0.6709654782255665, 'learning_rate': 9.066376784344357e-06, 'epoch': 0.22} + 22%|██▏ | 2704/12188 [5:52:09<18:44:18, 7.11s/it] 22%|██▏ | 2705/12188 [5:52:16<18:26:08, 7.00s/it] {'loss': 0.3646, 'grad_norm': 0.6620388541482407, 'learning_rate': 9.065603494581752e-06, 'epoch': 0.22} + 22%|██▏ | 2705/12188 [5:52:16<18:26:08, 7.00s/it] 22%|██▏ | 2706/12188 [5:52:23<18:22:56, 6.98s/it] {'loss': 0.3768, 'grad_norm': 0.6462595507172947, 'learning_rate': 9.064829917712759e-06, 'epoch': 0.22} + 22%|██▏ | 2706/12188 [5:52:23<18:22:56, 6.98s/it] 22%|██▏ | 2707/12188 [5:52:31<19:00:55, 7.22s/it] {'loss': 0.3195, 'grad_norm': 0.6175889811584091, 'learning_rate': 9.064056053792006e-06, 'epoch': 0.22} + 22%|██▏ | 2707/12188 [5:52:31<19:00:55, 7.22s/it] 22%|██▏ | 2708/12188 [5:52:38<19:10:50, 7.28s/it] {'loss': 0.368, 'grad_norm': 0.631373771349987, 'learning_rate': 9.063281902874146e-06, 'epoch': 0.22} + 22%|██▏ | 2708/12188 [5:52:38<19:10:50, 7.28s/it] 22%|██▏ | 2709/12188 [5:52:45<18:42:49, 7.11s/it] {'loss': 0.3838, 'grad_norm': 0.6600114793452201, 'learning_rate': 9.062507465013845e-06, 'epoch': 0.22} + 22%|██▏ | 2709/12188 [5:52:45<18:42:49, 7.11s/it] 22%|██▏ | 2710/12188 [5:52:51<18:20:51, 6.97s/it] {'loss': 0.3748, 'grad_norm': 0.5923089891289999, 'learning_rate': 9.061732740265797e-06, 'epoch': 0.22} + 22%|██▏ | 2710/12188 [5:52:51<18:20:51, 6.97s/it] 22%|██▏ | 2711/12188 [5:52:59<18:27:43, 7.01s/it] {'loss': 0.4136, 'grad_norm': 0.6513483666234801, 'learning_rate': 9.060957728684705e-06, 'epoch': 0.22} + 22%|██▏ | 2711/12188 [5:52:59<18:27:43, 7.01s/it] 22%|██▏ | 2712/12188 [5:53:06<18:46:39, 7.13s/it] {'loss': 0.4011, 'grad_norm': 0.5919377915623482, 'learning_rate': 9.060182430325303e-06, 'epoch': 0.22} + 22%|██▏ | 2712/12188 [5:53:06<18:46:39, 7.13s/it] 22%|██▏ | 2713/12188 [5:53:13<18:20:19, 6.97s/it] {'loss': 0.3745, 'grad_norm': 0.7807396400069077, 'learning_rate': 9.059406845242343e-06, 'epoch': 0.22} + 22%|██▏ | 2713/12188 [5:53:13<18:20:19, 6.97s/it] 22%|██▏ | 2714/12188 [5:53:21<19:41:44, 7.48s/it] {'loss': 0.4042, 'grad_norm': 0.670477961255953, 'learning_rate': 9.058630973490591e-06, 'epoch': 0.22} + 22%|██▏ | 2714/12188 [5:53:21<19:41:44, 7.48s/it] 22%|██▏ | 2715/12188 [5:53:28<19:15:25, 7.32s/it] {'loss': 0.4088, 'grad_norm': 0.7370705555511812, 'learning_rate': 9.057854815124841e-06, 'epoch': 0.22} + 22%|██▏ | 2715/12188 [5:53:28<19:15:25, 7.32s/it] 22%|██▏ | 2716/12188 [5:53:36<19:43:19, 7.50s/it] {'loss': 0.3666, 'grad_norm': 0.6664938483987727, 'learning_rate': 9.057078370199905e-06, 'epoch': 0.22} + 22%|██▏ | 2716/12188 [5:53:36<19:43:19, 7.50s/it] 22%|██▏ | 2717/12188 [5:53:44<20:07:52, 7.65s/it] {'loss': 0.3536, 'grad_norm': 0.6113593669993602, 'learning_rate': 9.05630163877061e-06, 'epoch': 0.22} + 22%|██▏ | 2717/12188 [5:53:44<20:07:52, 7.65s/it] 22%|██▏ | 2718/12188 [5:53:51<19:40:36, 7.48s/it] {'loss': 0.4004, 'grad_norm': 0.7384710363433474, 'learning_rate': 9.055524620891814e-06, 'epoch': 0.22} + 22%|██▏ | 2718/12188 [5:53:51<19:40:36, 7.48s/it] 22%|██▏ | 2719/12188 [5:54:01<21:31:38, 8.18s/it] {'loss': 0.4312, 'grad_norm': 0.6171335961026468, 'learning_rate': 9.054747316618381e-06, 'epoch': 0.22} + 22%|██▏ | 2719/12188 [5:54:01<21:31:38, 8.18s/it] 22%|██▏ | 2720/12188 [5:54:09<21:20:56, 8.12s/it] {'loss': 0.3913, 'grad_norm': 0.8163005832165359, 'learning_rate': 9.053969726005209e-06, 'epoch': 0.22} + 22%|██▏ | 2720/12188 [5:54:09<21:20:56, 8.12s/it] 22%|██▏ | 2721/12188 [5:54:16<20:14:33, 7.70s/it] {'loss': 0.3974, 'grad_norm': 0.6367867062695093, 'learning_rate': 9.053191849107208e-06, 'epoch': 0.22} + 22%|██▏ | 2721/12188 [5:54:16<20:14:33, 7.70s/it] 22%|██▏ | 2722/12188 [5:54:24<20:50:42, 7.93s/it] {'loss': 0.3919, 'grad_norm': 0.6194505909454521, 'learning_rate': 9.052413685979311e-06, 'epoch': 0.22} + 22%|██▏ | 2722/12188 [5:54:24<20:50:42, 7.93s/it] 22%|██▏ | 2723/12188 [5:54:31<20:15:10, 7.70s/it] {'loss': 0.409, 'grad_norm': 0.6594909840340127, 'learning_rate': 9.05163523667647e-06, 'epoch': 0.22} + 22%|██▏ | 2723/12188 [5:54:31<20:15:10, 7.70s/it] 22%|██▏ | 2724/12188 [5:54:39<20:01:47, 7.62s/it] {'loss': 0.3652, 'grad_norm': 0.5836912829189559, 'learning_rate': 9.050856501253657e-06, 'epoch': 0.22} + 22%|██▏ | 2724/12188 [5:54:39<20:01:47, 7.62s/it] 22%|██▏ | 2725/12188 [5:54:46<19:35:59, 7.46s/it] {'loss': 0.4276, 'grad_norm': 0.6611595599297265, 'learning_rate': 9.050077479765867e-06, 'epoch': 0.22} + 22%|██▏ | 2725/12188 [5:54:46<19:35:59, 7.46s/it] 22%|██▏ | 2726/12188 [5:54:53<19:33:20, 7.44s/it] {'loss': 0.3891, 'grad_norm': 0.674963760868661, 'learning_rate': 9.049298172268114e-06, 'epoch': 0.22} + 22%|██▏ | 2726/12188 [5:54:53<19:33:20, 7.44s/it] 22%|██▏ | 2727/12188 [5:55:01<19:41:14, 7.49s/it] {'loss': 0.4089, 'grad_norm': 0.6179739663466204, 'learning_rate': 9.048518578815427e-06, 'epoch': 0.22} + 22%|██▏ | 2727/12188 [5:55:01<19:41:14, 7.49s/it] 22%|██▏ | 2728/12188 [5:55:07<19:01:43, 7.24s/it] {'loss': 0.3508, 'grad_norm': 0.6425962326109272, 'learning_rate': 9.047738699462863e-06, 'epoch': 0.22} + 22%|██▏ | 2728/12188 [5:55:07<19:01:43, 7.24s/it] 22%|██▏ | 2729/12188 [5:55:15<19:03:12, 7.25s/it] {'loss': 0.3915, 'grad_norm': 0.6310368700182063, 'learning_rate': 9.046958534265495e-06, 'epoch': 0.22} + 22%|██▏ | 2729/12188 [5:55:15<19:03:12, 7.25s/it] 22%|██▏ | 2730/12188 [5:55:22<18:58:37, 7.22s/it] {'loss': 0.3455, 'grad_norm': 0.633905099077027, 'learning_rate': 9.046178083278417e-06, 'epoch': 0.22} + 22%|██▏ | 2730/12188 [5:55:22<18:58:37, 7.22s/it] 22%|██▏ | 2731/12188 [5:55:28<18:28:35, 7.03s/it] {'loss': 0.3773, 'grad_norm': 0.6175438725589734, 'learning_rate': 9.045397346556744e-06, 'epoch': 0.22} + 22%|██▏ | 2731/12188 [5:55:28<18:28:35, 7.03s/it] 22%|██▏ | 2732/12188 [5:55:35<18:03:47, 6.88s/it] {'loss': 0.3724, 'grad_norm': 0.6382434164535213, 'learning_rate': 9.044616324155611e-06, 'epoch': 0.22} + 22%|██▏ | 2732/12188 [5:55:35<18:03:47, 6.88s/it] 22%|██▏ | 2733/12188 [5:55:42<18:31:06, 7.05s/it] {'loss': 0.3727, 'grad_norm': 0.6966640756446734, 'learning_rate': 9.04383501613017e-06, 'epoch': 0.22} + 22%|██▏ | 2733/12188 [5:55:42<18:31:06, 7.05s/it] 22%|██▏ | 2734/12188 [5:55:50<18:46:18, 7.15s/it] {'loss': 0.3849, 'grad_norm': 0.6033039677245693, 'learning_rate': 9.043053422535597e-06, 'epoch': 0.22} + 22%|██▏ | 2734/12188 [5:55:50<18:46:18, 7.15s/it] 22%|██▏ | 2735/12188 [5:55:57<18:39:20, 7.10s/it] {'loss': 0.3997, 'grad_norm': 0.6555502016461975, 'learning_rate': 9.042271543427088e-06, 'epoch': 0.22} + 22%|██▏ | 2735/12188 [5:55:57<18:39:20, 7.10s/it] 22%|██▏ | 2736/12188 [5:56:04<18:54:01, 7.20s/it] {'loss': 0.3656, 'grad_norm': 0.6239332558612243, 'learning_rate': 9.041489378859856e-06, 'epoch': 0.22} + 22%|██▏ | 2736/12188 [5:56:04<18:54:01, 7.20s/it] 22%|██▏ | 2737/12188 [5:56:11<18:44:16, 7.14s/it] {'loss': 0.4045, 'grad_norm': 0.6357515889087331, 'learning_rate': 9.040706928889138e-06, 'epoch': 0.22} + 22%|██▏ | 2737/12188 [5:56:11<18:44:16, 7.14s/it] 22%|██▏ | 2738/12188 [5:56:18<18:07:32, 6.91s/it] {'loss': 0.3727, 'grad_norm': 0.5831629079831314, 'learning_rate': 9.039924193570188e-06, 'epoch': 0.22} + 22%|██▏ | 2738/12188 [5:56:18<18:07:32, 6.91s/it] 22%|██▏ | 2739/12188 [5:56:24<18:06:46, 6.90s/it] {'loss': 0.3786, 'grad_norm': 0.7500176258821311, 'learning_rate': 9.039141172958282e-06, 'epoch': 0.22} + 22%|██▏ | 2739/12188 [5:56:25<18:06:46, 6.90s/it] 22%|██▏ | 2740/12188 [5:56:32<18:32:27, 7.06s/it] {'loss': 0.4163, 'grad_norm': 0.899032325871651, 'learning_rate': 9.038357867108716e-06, 'epoch': 0.22} + 22%|██▏ | 2740/12188 [5:56:32<18:32:27, 7.06s/it] 22%|██▏ | 2741/12188 [5:56:40<19:28:19, 7.42s/it] {'loss': 0.3603, 'grad_norm': 0.6352166783017423, 'learning_rate': 9.037574276076806e-06, 'epoch': 0.22} + 22%|██▏ | 2741/12188 [5:56:40<19:28:19, 7.42s/it] 22%|██▏ | 2742/12188 [5:56:47<19:07:28, 7.29s/it] {'loss': 0.4335, 'grad_norm': 0.7693239250401079, 'learning_rate': 9.036790399917886e-06, 'epoch': 0.22} + 22%|██▏ | 2742/12188 [5:56:47<19:07:28, 7.29s/it] 23%|██▎ | 2743/12188 [5:56:55<19:14:57, 7.34s/it] {'loss': 0.3917, 'grad_norm': 0.6321867568959859, 'learning_rate': 9.036006238687316e-06, 'epoch': 0.23} + 23%|██▎ | 2743/12188 [5:56:55<19:14:57, 7.34s/it] 23%|██▎ | 2744/12188 [5:57:03<19:50:43, 7.56s/it] {'loss': 0.3541, 'grad_norm': 0.6040845374030762, 'learning_rate': 9.035221792440468e-06, 'epoch': 0.23} + 23%|██▎ | 2744/12188 [5:57:03<19:50:43, 7.56s/it] 23%|██▎ | 2745/12188 [5:57:10<19:55:26, 7.60s/it] {'loss': 0.3492, 'grad_norm': 0.6118227147740725, 'learning_rate': 9.03443706123274e-06, 'epoch': 0.23} + 23%|██▎ | 2745/12188 [5:57:10<19:55:26, 7.60s/it] 23%|██▎ | 2746/12188 [5:57:18<19:38:46, 7.49s/it] {'loss': 0.3966, 'grad_norm': 0.6347230243745255, 'learning_rate': 9.03365204511955e-06, 'epoch': 0.23} + 23%|██▎ | 2746/12188 [5:57:18<19:38:46, 7.49s/it] 23%|██▎ | 2747/12188 [5:57:25<19:10:15, 7.31s/it] {'loss': 0.3907, 'grad_norm': 0.798327795822871, 'learning_rate': 9.032866744156332e-06, 'epoch': 0.23} + 23%|██▎ | 2747/12188 [5:57:25<19:10:15, 7.31s/it] 23%|██▎ | 2748/12188 [5:57:32<18:59:35, 7.24s/it] {'loss': 0.3541, 'grad_norm': 0.6620000572059247, 'learning_rate': 9.032081158398542e-06, 'epoch': 0.23} + 23%|██▎ | 2748/12188 [5:57:32<18:59:35, 7.24s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f7290cf3920> +[Try #0] Failed to fetch sample 4407704 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f7290cf3920> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Community portal'"}, {'from': 'gpt', 'value': '\nclick(x=0.858, y=0.2255)\n'}]} + 23%|██▎ | 2749/12188 [5:57:41<21:00:20, 8.01s/it] {'loss': 0.4061, 'grad_norm': 0.6650270675360587, 'learning_rate': 9.031295287901663e-06, 'epoch': 0.23} + 23%|██▎ | 2749/12188 [5:57:41<21:00:20, 8.01s/it] 23%|██▎ | 2750/12188 [5:57:50<21:05:35, 8.05s/it] {'loss': 0.3795, 'grad_norm': 0.5998919718947743, 'learning_rate': 9.030509132721183e-06, 'epoch': 0.23} + 23%|██▎ | 2750/12188 [5:57:50<21:05:35, 8.05s/it] 23%|██▎ | 2751/12188 [5:57:57<20:26:02, 7.80s/it] {'loss': 0.337, 'grad_norm': 0.7178732843558149, 'learning_rate': 9.029722692912625e-06, 'epoch': 0.23} + 23%|██▎ | 2751/12188 [5:57:57<20:26:02, 7.80s/it] 23%|██▎ | 2752/12188 [5:58:03<19:34:27, 7.47s/it] {'loss': 0.3633, 'grad_norm': 0.6415540256019558, 'learning_rate': 9.028935968531528e-06, 'epoch': 0.23} + 23%|██▎ | 2752/12188 [5:58:03<19:34:27, 7.47s/it] 23%|██▎ | 2753/12188 [5:58:11<19:24:55, 7.41s/it] {'loss': 0.3447, 'grad_norm': 0.604806225408011, 'learning_rate': 9.028148959633442e-06, 'epoch': 0.23} + 23%|██▎ | 2753/12188 [5:58:11<19:24:55, 7.41s/it] 23%|██▎ | 2754/12188 [5:58:18<19:03:35, 7.27s/it] {'loss': 0.4174, 'grad_norm': 0.7699195815096239, 'learning_rate': 9.027361666273949e-06, 'epoch': 0.23} + 23%|██▎ | 2754/12188 [5:58:18<19:03:35, 7.27s/it] 23%|██▎ | 2755/12188 [5:58:24<18:27:14, 7.04s/it] {'loss': 0.389, 'grad_norm': 0.6496747233912341, 'learning_rate': 9.026574088508646e-06, 'epoch': 0.23} + 23%|██▎ | 2755/12188 [5:58:24<18:27:14, 7.04s/it] 23%|██▎ | 2756/12188 [5:58:31<18:39:09, 7.12s/it] {'loss': 0.3565, 'grad_norm': 0.7474543134083539, 'learning_rate': 9.02578622639315e-06, 'epoch': 0.23} + 23%|██▎ | 2756/12188 [5:58:31<18:39:09, 7.12s/it] 23%|██▎ | 2757/12188 [5:58:38<18:33:34, 7.08s/it] {'loss': 0.3409, 'grad_norm': 0.6095781669110364, 'learning_rate': 9.024998079983099e-06, 'epoch': 0.23} + 23%|██▎ | 2757/12188 [5:58:38<18:33:34, 7.08s/it] 23%|██▎ | 2758/12188 [5:58:46<18:55:44, 7.23s/it] {'loss': 0.385, 'grad_norm': 0.728958368822159, 'learning_rate': 9.02420964933415e-06, 'epoch': 0.23} + 23%|██▎ | 2758/12188 [5:58:46<18:55:44, 7.23s/it] 23%|██▎ | 2759/12188 [5:58:53<18:52:56, 7.21s/it] {'loss': 0.4326, 'grad_norm': 0.6731824457966341, 'learning_rate': 9.023420934501981e-06, 'epoch': 0.23} + 23%|██▎ | 2759/12188 [5:58:53<18:52:56, 7.21s/it] 23%|██▎ | 2760/12188 [5:59:00<18:36:01, 7.10s/it] {'loss': 0.3581, 'grad_norm': 0.6878003562322927, 'learning_rate': 9.02263193554229e-06, 'epoch': 0.23} + 23%|██▎ | 2760/12188 [5:59:00<18:36:01, 7.10s/it] 23%|██▎ | 2761/12188 [5:59:07<18:10:15, 6.94s/it] {'loss': 0.4097, 'grad_norm': 0.8819676549349127, 'learning_rate': 9.021842652510793e-06, 'epoch': 0.23} + 23%|██▎ | 2761/12188 [5:59:07<18:10:15, 6.94s/it] 23%|██▎ | 2762/12188 [5:59:13<18:04:17, 6.90s/it] {'loss': 0.3616, 'grad_norm': 0.6645278369742283, 'learning_rate': 9.021053085463231e-06, 'epoch': 0.23} + 23%|██▎ | 2762/12188 [5:59:13<18:04:17, 6.90s/it] 23%|██▎ | 2763/12188 [5:59:20<17:50:57, 6.82s/it] {'loss': 0.3513, 'grad_norm': 0.5963076260764391, 'learning_rate': 9.02026323445536e-06, 'epoch': 0.23} + 23%|██▎ | 2763/12188 [5:59:20<17:50:57, 6.82s/it] 23%|██▎ | 2764/12188 [5:59:29<19:37:12, 7.49s/it] {'loss': 0.3929, 'grad_norm': 0.723715950065725, 'learning_rate': 9.01947309954296e-06, 'epoch': 0.23} + 23%|██▎ | 2764/12188 [5:59:29<19:37:12, 7.49s/it] 23%|██▎ | 2765/12188 [5:59:36<19:15:28, 7.36s/it] {'loss': 0.3639, 'grad_norm': 0.6911059081842901, 'learning_rate': 9.018682680781825e-06, 'epoch': 0.23} + 23%|██▎ | 2765/12188 [5:59:36<19:15:28, 7.36s/it] 23%|██▎ | 2766/12188 [5:59:43<19:11:31, 7.33s/it] {'loss': 0.3757, 'grad_norm': 0.6270442530743773, 'learning_rate': 9.017891978227776e-06, 'epoch': 0.23} + 23%|██▎ | 2766/12188 [5:59:43<19:11:31, 7.33s/it] 23%|██▎ | 2767/12188 [5:59:51<19:03:22, 7.28s/it] {'loss': 0.4123, 'grad_norm': 0.7141614357024026, 'learning_rate': 9.017100991936651e-06, 'epoch': 0.23} + 23%|██▎ | 2767/12188 [5:59:51<19:03:22, 7.28s/it] 23%|██▎ | 2768/12188 [5:59:58<18:58:41, 7.25s/it] {'loss': 0.3881, 'grad_norm': 0.6321003809007437, 'learning_rate': 9.01630972196431e-06, 'epoch': 0.23} + 23%|██▎ | 2768/12188 [5:59:58<18:58:41, 7.25s/it] 23%|██▎ | 2769/12188 [6:00:05<18:52:19, 7.21s/it] {'loss': 0.417, 'grad_norm': 0.7103376453755822, 'learning_rate': 9.015518168366626e-06, 'epoch': 0.23} + 23%|██▎ | 2769/12188 [6:00:05<18:52:19, 7.21s/it] 23%|██▎ | 2770/12188 [6:00:12<18:26:59, 7.05s/it] {'loss': 0.3991, 'grad_norm': 0.73348481405324, 'learning_rate': 9.014726331199501e-06, 'epoch': 0.23} + 23%|██▎ | 2770/12188 [6:00:12<18:26:59, 7.05s/it] 23%|██▎ | 2771/12188 [6:00:18<18:16:57, 6.99s/it] {'loss': 0.3605, 'grad_norm': 0.6520513209000508, 'learning_rate': 9.013934210518853e-06, 'epoch': 0.23} + 23%|██▎ | 2771/12188 [6:00:18<18:16:57, 6.99s/it] 23%|██▎ | 2772/12188 [6:00:25<18:16:03, 6.98s/it] {'loss': 0.3736, 'grad_norm': 0.6090894415052664, 'learning_rate': 9.013141806380621e-06, 'epoch': 0.23} + 23%|██▎ | 2772/12188 [6:00:25<18:16:03, 6.98s/it] 23%|██▎ | 2773/12188 [6:00:33<18:38:10, 7.13s/it] {'loss': 0.3704, 'grad_norm': 0.6508748349767601, 'learning_rate': 9.012349118840761e-06, 'epoch': 0.23} + 23%|██▎ | 2773/12188 [6:00:33<18:38:10, 7.13s/it] 23%|██▎ | 2774/12188 [6:00:40<18:26:15, 7.05s/it] {'loss': 0.3858, 'grad_norm': 1.0448578923778737, 'learning_rate': 9.011556147955255e-06, 'epoch': 0.23} + 23%|██▎ | 2774/12188 [6:00:40<18:26:15, 7.05s/it] 23%|██▎ | 2775/12188 [6:00:47<18:23:49, 7.04s/it] {'loss': 0.3773, 'grad_norm': 0.630351923945559, 'learning_rate': 9.010762893780095e-06, 'epoch': 0.23} + 23%|██▎ | 2775/12188 [6:00:47<18:23:49, 7.04s/it] 23%|██▎ | 2776/12188 [6:00:54<18:13:26, 6.97s/it] {'loss': 0.3763, 'grad_norm': 0.6260563594943098, 'learning_rate': 9.009969356371307e-06, 'epoch': 0.23} + 23%|██▎ | 2776/12188 [6:00:54<18:13:26, 6.97s/it] 23%|██▎ | 2777/12188 [6:01:02<19:26:03, 7.43s/it] {'loss': 0.3764, 'grad_norm': 0.7453042150792981, 'learning_rate': 9.009175535784923e-06, 'epoch': 0.23} + 23%|██▎ | 2777/12188 [6:01:02<19:26:03, 7.43s/it] 23%|██▎ | 2778/12188 [6:01:09<19:05:10, 7.30s/it] {'loss': 0.4028, 'grad_norm': 0.6417839437446449, 'learning_rate': 9.008381432077005e-06, 'epoch': 0.23} + 23%|██▎ | 2778/12188 [6:01:09<19:05:10, 7.30s/it] 23%|██▎ | 2779/12188 [6:01:16<18:41:44, 7.15s/it] {'loss': 0.3717, 'grad_norm': 0.7028196683040475, 'learning_rate': 9.007587045303628e-06, 'epoch': 0.23} + 23%|██▎ | 2779/12188 [6:01:16<18:41:44, 7.15s/it] 23%|██▎ | 2780/12188 [6:01:23<18:54:38, 7.24s/it] {'loss': 0.4177, 'grad_norm': 0.6891279795857909, 'learning_rate': 9.006792375520895e-06, 'epoch': 0.23} + 23%|██▎ | 2780/12188 [6:01:23<18:54:38, 7.24s/it] 23%|██▎ | 2781/12188 [6:01:30<18:30:03, 7.08s/it] {'loss': 0.3787, 'grad_norm': 0.62558348181125, 'learning_rate': 9.005997422784922e-06, 'epoch': 0.23} + 23%|██▎ | 2781/12188 [6:01:30<18:30:03, 7.08s/it] 23%|██▎ | 2782/12188 [6:01:37<18:25:48, 7.05s/it] {'loss': 0.3491, 'grad_norm': 0.6433918327057836, 'learning_rate': 9.005202187151845e-06, 'epoch': 0.23} + 23%|██▎ | 2782/12188 [6:01:37<18:25:48, 7.05s/it] 23%|██▎ | 2783/12188 [6:01:44<18:03:40, 6.91s/it] {'loss': 0.3795, 'grad_norm': 0.6546822753837355, 'learning_rate': 9.004406668677825e-06, 'epoch': 0.23} + 23%|██▎ | 2783/12188 [6:01:44<18:03:40, 6.91s/it] 23%|██▎ | 2784/12188 [6:01:50<17:52:54, 6.85s/it] {'loss': 0.4068, 'grad_norm': 0.6236442273667719, 'learning_rate': 9.003610867419042e-06, 'epoch': 0.23} + 23%|██▎ | 2784/12188 [6:01:50<17:52:54, 6.85s/it] 23%|██▎ | 2785/12188 [6:01:57<17:56:06, 6.87s/it] {'loss': 0.3475, 'grad_norm': 0.5872768155829498, 'learning_rate': 9.00281478343169e-06, 'epoch': 0.23} + 23%|██▎ | 2785/12188 [6:01:57<17:56:06, 6.87s/it] 23%|██▎ | 2786/12188 [6:02:05<18:54:16, 7.24s/it] {'loss': 0.3715, 'grad_norm': 0.7259445017063803, 'learning_rate': 9.00201841677199e-06, 'epoch': 0.23} + 23%|██▎ | 2786/12188 [6:02:05<18:54:16, 7.24s/it] 23%|██▎ | 2787/12188 [6:02:13<18:54:58, 7.24s/it] {'loss': 0.3767, 'grad_norm': 0.6291178896770515, 'learning_rate': 9.001221767496178e-06, 'epoch': 0.23} + 23%|██▎ | 2787/12188 [6:02:13<18:54:58, 7.24s/it] 23%|██▎ | 2788/12188 [6:02:21<19:53:19, 7.62s/it] {'loss': 0.3723, 'grad_norm': 0.6377587814826681, 'learning_rate': 9.000424835660514e-06, 'epoch': 0.23} + 23%|██▎ | 2788/12188 [6:02:21<19:53:19, 7.62s/it] 23%|██▎ | 2789/12188 [6:02:28<19:07:02, 7.32s/it] {'loss': 0.4119, 'grad_norm': 0.6443038619724775, 'learning_rate': 8.999627621321274e-06, 'epoch': 0.23} + 23%|██▎ | 2789/12188 [6:02:28<19:07:02, 7.32s/it] 23%|██▎ | 2790/12188 [6:02:34<18:29:02, 7.08s/it] {'loss': 0.337, 'grad_norm': 0.6489885307546357, 'learning_rate': 8.998830124534758e-06, 'epoch': 0.23} + 23%|██▎ | 2790/12188 [6:02:34<18:29:02, 7.08s/it] 23%|██▎ | 2791/12188 [6:02:42<18:40:43, 7.16s/it] {'loss': 0.3927, 'grad_norm': 1.0471705587245788, 'learning_rate': 8.998032345357284e-06, 'epoch': 0.23} + 23%|██▎ | 2791/12188 [6:02:42<18:40:43, 7.16s/it] 23%|██▎ | 2792/12188 [6:02:49<18:38:28, 7.14s/it] {'loss': 0.3309, 'grad_norm': 0.5895130236301753, 'learning_rate': 8.997234283845188e-06, 'epoch': 0.23} + 23%|██▎ | 2792/12188 [6:02:49<18:38:28, 7.14s/it] 23%|██▎ | 2793/12188 [6:02:57<19:13:37, 7.37s/it] {'loss': 0.324, 'grad_norm': 0.6090335629479631, 'learning_rate': 8.996435940054829e-06, 'epoch': 0.23} + 23%|██▎ | 2793/12188 [6:02:57<19:13:37, 7.37s/it] 23%|██▎ | 2794/12188 [6:03:04<19:07:47, 7.33s/it] {'loss': 0.3827, 'grad_norm': 0.6632852336243181, 'learning_rate': 8.995637314042585e-06, 'epoch': 0.23} + 23%|██▎ | 2794/12188 [6:03:04<19:07:47, 7.33s/it] 23%|██▎ | 2795/12188 [6:03:11<19:17:49, 7.40s/it] {'loss': 0.3982, 'grad_norm': 0.692373258105404, 'learning_rate': 8.994838405864854e-06, 'epoch': 0.23} + 23%|██▎ | 2795/12188 [6:03:11<19:17:49, 7.40s/it] 23%|██▎ | 2796/12188 [6:03:19<19:14:43, 7.38s/it] {'loss': 0.3748, 'grad_norm': 0.6578694611168083, 'learning_rate': 8.994039215578053e-06, 'epoch': 0.23} + 23%|██▎ | 2796/12188 [6:03:19<19:14:43, 7.38s/it] 23%|██▎ | 2797/12188 [6:03:25<18:48:46, 7.21s/it] {'loss': 0.3795, 'grad_norm': 0.7081116026236615, 'learning_rate': 8.993239743238618e-06, 'epoch': 0.23} + 23%|██▎ | 2797/12188 [6:03:25<18:48:46, 7.21s/it] 23%|██▎ | 2798/12188 [6:03:33<18:44:35, 7.19s/it] {'loss': 0.394, 'grad_norm': 0.67094636764655, 'learning_rate': 8.99243998890301e-06, 'epoch': 0.23} + 23%|██▎ | 2798/12188 [6:03:33<18:44:35, 7.19s/it] 23%|██▎ | 2799/12188 [6:03:39<18:22:11, 7.04s/it] {'loss': 0.3863, 'grad_norm': 0.7124296318099466, 'learning_rate': 8.991639952627702e-06, 'epoch': 0.23} + 23%|██▎ | 2799/12188 [6:03:39<18:22:11, 7.04s/it] 23%|██▎ | 2800/12188 [6:03:47<18:54:12, 7.25s/it] {'loss': 0.372, 'grad_norm': 0.5959499314579229, 'learning_rate': 8.990839634469194e-06, 'epoch': 0.23} + 23%|██▎ | 2800/12188 [6:03:47<18:54:12, 7.25s/it] 23%|██▎ | 2801/12188 [6:03:55<19:04:45, 7.32s/it] {'loss': 0.3419, 'grad_norm': 0.6508109055578482, 'learning_rate': 8.990039034484004e-06, 'epoch': 0.23} + 23%|██▎ | 2801/12188 [6:03:55<19:04:45, 7.32s/it] 23%|██▎ | 2802/12188 [6:04:02<18:58:41, 7.28s/it] {'loss': 0.3353, 'grad_norm': 0.6590394882262447, 'learning_rate': 8.989238152728667e-06, 'epoch': 0.23} + 23%|██▎ | 2802/12188 [6:04:02<18:58:41, 7.28s/it] 23%|██▎ | 2803/12188 [6:04:09<19:01:14, 7.30s/it] {'loss': 0.3787, 'grad_norm': 0.6411953722561791, 'learning_rate': 8.98843698925974e-06, 'epoch': 0.23} + 23%|██▎ | 2803/12188 [6:04:09<19:01:14, 7.30s/it] 23%|██▎ | 2804/12188 [6:04:16<18:59:18, 7.28s/it] {'loss': 0.3991, 'grad_norm': 0.6850047222999149, 'learning_rate': 8.987635544133804e-06, 'epoch': 0.23} + 23%|██▎ | 2804/12188 [6:04:16<18:59:18, 7.28s/it] 23%|██▎ | 2805/12188 [6:04:23<18:46:06, 7.20s/it] {'loss': 0.375, 'grad_norm': 0.700834438885956, 'learning_rate': 8.98683381740745e-06, 'epoch': 0.23} + 23%|██▎ | 2805/12188 [6:04:23<18:46:06, 7.20s/it] 23%|██▎ | 2806/12188 [6:04:31<19:28:19, 7.47s/it] {'loss': 0.3783, 'grad_norm': 0.7847630452590935, 'learning_rate': 8.9860318091373e-06, 'epoch': 0.23} + 23%|██▎ | 2806/12188 [6:04:31<19:28:19, 7.47s/it] 23%|██▎ | 2807/12188 [6:04:39<19:13:22, 7.38s/it] {'loss': 0.3917, 'grad_norm': 0.6738351275018104, 'learning_rate': 8.985229519379984e-06, 'epoch': 0.23} + 23%|██▎ | 2807/12188 [6:04:39<19:13:22, 7.38s/it] 23%|██▎ | 2808/12188 [6:04:46<18:55:50, 7.27s/it] {'loss': 0.3781, 'grad_norm': 0.7807312940810197, 'learning_rate': 8.984426948192164e-06, 'epoch': 0.23} + 23%|██▎ | 2808/12188 [6:04:46<18:55:50, 7.27s/it] 23%|██▎ | 2809/12188 [6:04:53<18:57:14, 7.28s/it] {'loss': 0.4168, 'grad_norm': 3.2430297971100543, 'learning_rate': 8.983624095630513e-06, 'epoch': 0.23} + 23%|██▎ | 2809/12188 [6:04:53<18:57:14, 7.28s/it] 23%|██▎ | 2810/12188 [6:04:59<18:22:15, 7.05s/it] {'loss': 0.3607, 'grad_norm': 0.6497912653789684, 'learning_rate': 8.98282096175173e-06, 'epoch': 0.23} + 23%|██▎ | 2810/12188 [6:04:59<18:22:15, 7.05s/it] 23%|██▎ | 2811/12188 [6:05:06<18:22:20, 7.05s/it] {'loss': 0.3749, 'grad_norm': 0.7326014909954742, 'learning_rate': 8.982017546612529e-06, 'epoch': 0.23} + 23%|██▎ | 2811/12188 [6:05:06<18:22:20, 7.05s/it] 23%|██▎ | 2812/12188 [6:05:14<19:01:31, 7.30s/it] {'loss': 0.3338, 'grad_norm': 0.5858238930873262, 'learning_rate': 8.981213850269645e-06, 'epoch': 0.23} + 23%|██▎ | 2812/12188 [6:05:14<19:01:31, 7.30s/it] 23%|██▎ | 2813/12188 [6:05:21<18:46:50, 7.21s/it] {'loss': 0.412, 'grad_norm': 0.6748765958187032, 'learning_rate': 8.980409872779838e-06, 'epoch': 0.23} + 23%|██▎ | 2813/12188 [6:05:21<18:46:50, 7.21s/it] 23%|██▎ | 2814/12188 [6:05:28<18:30:02, 7.11s/it] {'loss': 0.3787, 'grad_norm': 0.7213546333103097, 'learning_rate': 8.979605614199878e-06, 'epoch': 0.23} + 23%|██▎ | 2814/12188 [6:05:28<18:30:02, 7.11s/it] 23%|██▎ | 2815/12188 [6:05:35<18:28:06, 7.09s/it] {'loss': 0.3987, 'grad_norm': 0.6648633834793602, 'learning_rate': 8.978801074586566e-06, 'epoch': 0.23} + 23%|██▎ | 2815/12188 [6:05:35<18:28:06, 7.09s/it] 23%|██▎ | 2816/12188 [6:05:43<19:17:02, 7.41s/it] {'loss': 0.3745, 'grad_norm': 0.6460710982567687, 'learning_rate': 8.977996253996712e-06, 'epoch': 0.23} + 23%|██▎ | 2816/12188 [6:05:43<19:17:02, 7.41s/it] 23%|██▎ | 2817/12188 [6:05:50<19:01:45, 7.31s/it] {'loss': 0.3791, 'grad_norm': 0.8365663242920363, 'learning_rate': 8.977191152487155e-06, 'epoch': 0.23} + 23%|██▎ | 2817/12188 [6:05:51<19:01:45, 7.31s/it] 23%|██▎ | 2818/12188 [6:05:58<18:55:52, 7.27s/it] {'loss': 0.3525, 'grad_norm': 0.6010725332122804, 'learning_rate': 8.97638577011475e-06, 'epoch': 0.23} + 23%|██▎ | 2818/12188 [6:05:58<18:55:52, 7.27s/it] 23%|██▎ | 2819/12188 [6:06:06<19:37:35, 7.54s/it] {'loss': 0.4169, 'grad_norm': 0.621051755278034, 'learning_rate': 8.975580106936368e-06, 'epoch': 0.23} + 23%|██▎ | 2819/12188 [6:06:06<19:37:35, 7.54s/it] 23%|██▎ | 2820/12188 [6:06:14<20:10:53, 7.76s/it] {'loss': 0.3749, 'grad_norm': 0.7093269690785807, 'learning_rate': 8.974774163008909e-06, 'epoch': 0.23} + 23%|██▎ | 2820/12188 [6:06:14<20:10:53, 7.76s/it] 23%|██▎ | 2821/12188 [6:06:21<19:29:23, 7.49s/it] {'loss': 0.34, 'grad_norm': 0.6383281248654004, 'learning_rate': 8.973967938389281e-06, 'epoch': 0.23} + 23%|██▎ | 2821/12188 [6:06:21<19:29:23, 7.49s/it] 23%|██▎ | 2822/12188 [6:06:28<19:01:31, 7.31s/it] {'loss': 0.371, 'grad_norm': 0.6103467727392008, 'learning_rate': 8.973161433134423e-06, 'epoch': 0.23} + 23%|██▎ | 2822/12188 [6:06:28<19:01:31, 7.31s/it] 23%|██▎ | 2823/12188 [6:06:37<20:28:06, 7.87s/it] {'loss': 0.3534, 'grad_norm': 0.6588769591285334, 'learning_rate': 8.972354647301289e-06, 'epoch': 0.23} + 23%|██▎ | 2823/12188 [6:06:37<20:28:06, 7.87s/it] 23%|██▎ | 2824/12188 [6:06:44<19:30:28, 7.50s/it] {'loss': 0.4028, 'grad_norm': 0.6201438268161813, 'learning_rate': 8.971547580946851e-06, 'epoch': 0.23} + 23%|██▎ | 2824/12188 [6:06:44<19:30:28, 7.50s/it] 23%|██▎ | 2825/12188 [6:06:51<19:41:36, 7.57s/it] {'loss': 0.3699, 'grad_norm': 0.6847087866969229, 'learning_rate': 8.970740234128104e-06, 'epoch': 0.23} + 23%|██▎ | 2825/12188 [6:06:51<19:41:36, 7.57s/it] 23%|██▎ | 2826/12188 [6:06:58<18:46:45, 7.22s/it] {'loss': 0.3642, 'grad_norm': 0.6758868976319113, 'learning_rate': 8.969932606902062e-06, 'epoch': 0.23} + 23%|██▎ | 2826/12188 [6:06:58<18:46:45, 7.22s/it] 23%|██▎ | 2827/12188 [6:07:05<18:34:55, 7.15s/it] {'loss': 0.4264, 'grad_norm': 0.6431867451199983, 'learning_rate': 8.969124699325757e-06, 'epoch': 0.23} + 23%|██▎ | 2827/12188 [6:07:05<18:34:55, 7.15s/it] 23%|██▎ | 2828/12188 [6:07:13<19:25:08, 7.47s/it] {'loss': 0.3734, 'grad_norm': 0.6684884343355303, 'learning_rate': 8.968316511456241e-06, 'epoch': 0.23} + 23%|██▎ | 2828/12188 [6:07:13<19:25:08, 7.47s/it] 23%|██▎ | 2829/12188 [6:07:20<19:14:18, 7.40s/it] {'loss': 0.4023, 'grad_norm': 0.7499984103468563, 'learning_rate': 8.967508043350591e-06, 'epoch': 0.23} + 23%|██▎ | 2829/12188 [6:07:20<19:14:18, 7.40s/it] 23%|██▎ | 2830/12188 [6:07:28<19:08:50, 7.37s/it] {'loss': 0.3411, 'grad_norm': 0.670011303186805, 'learning_rate': 8.966699295065896e-06, 'epoch': 0.23} + 23%|██▎ | 2830/12188 [6:07:28<19:08:50, 7.37s/it] 23%|██▎ | 2831/12188 [6:07:34<18:47:02, 7.23s/it] {'loss': 0.3738, 'grad_norm': 0.6148100098038527, 'learning_rate': 8.96589026665927e-06, 'epoch': 0.23} + 23%|██▎ | 2831/12188 [6:07:34<18:47:02, 7.23s/it] 23%|██▎ | 2832/12188 [6:07:42<18:48:39, 7.24s/it] {'loss': 0.3834, 'grad_norm': 0.6563853724912911, 'learning_rate': 8.965080958187844e-06, 'epoch': 0.23} + 23%|██▎ | 2832/12188 [6:07:42<18:48:39, 7.24s/it] 23%|██▎ | 2833/12188 [6:07:49<19:12:16, 7.39s/it] {'loss': 0.3907, 'grad_norm': 0.6587105449591535, 'learning_rate': 8.964271369708772e-06, 'epoch': 0.23} + 23%|██▎ | 2833/12188 [6:07:49<19:12:16, 7.39s/it] 23%|██▎ | 2834/12188 [6:07:56<18:47:25, 7.23s/it] {'loss': 0.3823, 'grad_norm': 0.6166844155017971, 'learning_rate': 8.963461501279226e-06, 'epoch': 0.23} + 23%|██▎ | 2834/12188 [6:07:56<18:47:25, 7.23s/it] 23%|██▎ | 2835/12188 [6:08:04<18:51:51, 7.26s/it] {'loss': 0.3807, 'grad_norm': 0.6491379102190407, 'learning_rate': 8.962651352956395e-06, 'epoch': 0.23} + 23%|██▎ | 2835/12188 [6:08:04<18:51:51, 7.26s/it] 23%|██▎ | 2836/12188 [6:08:10<18:18:42, 7.05s/it] {'loss': 0.4077, 'grad_norm': 0.6411009912319582, 'learning_rate': 8.961840924797494e-06, 'epoch': 0.23} + 23%|██▎ | 2836/12188 [6:08:10<18:18:42, 7.05s/it] 23%|██▎ | 2837/12188 [6:08:17<18:03:58, 6.96s/it] {'loss': 0.4024, 'grad_norm': 0.6495545750076355, 'learning_rate': 8.96103021685975e-06, 'epoch': 0.23} + 23%|██▎ | 2837/12188 [6:08:17<18:03:58, 6.96s/it] 23%|██▎ | 2838/12188 [6:08:24<17:50:41, 6.87s/it] {'loss': 0.4101, 'grad_norm': 0.6506503987888818, 'learning_rate': 8.960219229200415e-06, 'epoch': 0.23} + 23%|██▎ | 2838/12188 [6:08:24<17:50:41, 6.87s/it] 23%|██▎ | 2839/12188 [6:08:30<17:47:02, 6.85s/it] {'loss': 0.3732, 'grad_norm': 0.698790370202475, 'learning_rate': 8.959407961876763e-06, 'epoch': 0.23} + 23%|██▎ | 2839/12188 [6:08:30<17:47:02, 6.85s/it] 23%|██▎ | 2840/12188 [6:08:39<18:54:51, 7.28s/it] {'loss': 0.3973, 'grad_norm': 0.6518050638924852, 'learning_rate': 8.95859641494608e-06, 'epoch': 0.23} + 23%|██▎ | 2840/12188 [6:08:39<18:54:51, 7.28s/it] 23%|██▎ | 2841/12188 [6:08:47<19:29:50, 7.51s/it] {'loss': 0.4023, 'grad_norm': 0.6810023785928286, 'learning_rate': 8.957784588465681e-06, 'epoch': 0.23} + 23%|██▎ | 2841/12188 [6:08:47<19:29:50, 7.51s/it] 23%|██▎ | 2842/12188 [6:08:54<19:16:11, 7.42s/it] {'loss': 0.3423, 'grad_norm': 0.6320474202454498, 'learning_rate': 8.956972482492892e-06, 'epoch': 0.23} + 23%|██▎ | 2842/12188 [6:08:54<19:16:11, 7.42s/it] 23%|██▎ | 2843/12188 [6:09:02<20:01:22, 7.71s/it] {'loss': 0.4018, 'grad_norm': 0.6874450829400771, 'learning_rate': 8.956160097085063e-06, 'epoch': 0.23} + 23%|██▎ | 2843/12188 [6:09:02<20:01:22, 7.71s/it] 23%|██▎ | 2844/12188 [6:09:09<19:06:28, 7.36s/it] {'loss': 0.3577, 'grad_norm': 0.6514770069923067, 'learning_rate': 8.955347432299564e-06, 'epoch': 0.23} + 23%|██▎ | 2844/12188 [6:09:09<19:06:28, 7.36s/it] 23%|██▎ | 2845/12188 [6:09:17<19:59:27, 7.70s/it] {'loss': 0.3955, 'grad_norm': 0.7935894061753583, 'learning_rate': 8.954534488193784e-06, 'epoch': 0.23} + 23%|██▎ | 2845/12188 [6:09:17<19:59:27, 7.70s/it] 23%|██▎ | 2846/12188 [6:09:27<21:39:14, 8.34s/it] {'loss': 0.3888, 'grad_norm': 0.6653489701000806, 'learning_rate': 8.95372126482513e-06, 'epoch': 0.23} + 23%|██▎ | 2846/12188 [6:09:27<21:39:14, 8.34s/it] 23%|██▎ | 2847/12188 [6:09:35<21:04:05, 8.12s/it] {'loss': 0.3807, 'grad_norm': 0.6743321486495167, 'learning_rate': 8.952907762251035e-06, 'epoch': 0.23} + 23%|██▎ | 2847/12188 [6:09:35<21:04:05, 8.12s/it] 23%|██▎ | 2848/12188 [6:09:42<20:31:56, 7.91s/it] {'loss': 0.3891, 'grad_norm': 0.6493644849579343, 'learning_rate': 8.952093980528944e-06, 'epoch': 0.23} + 23%|██▎ | 2848/12188 [6:09:42<20:31:56, 7.91s/it] 23%|██▎ | 2849/12188 [6:09:49<19:41:13, 7.59s/it] {'loss': 0.3986, 'grad_norm': 0.6834333134738156, 'learning_rate': 8.951279919716327e-06, 'epoch': 0.23} + 23%|██▎ | 2849/12188 [6:09:49<19:41:13, 7.59s/it] 23%|██▎ | 2850/12188 [6:09:56<19:24:00, 7.48s/it] {'loss': 0.3737, 'grad_norm': 0.6344322860802195, 'learning_rate': 8.95046557987067e-06, 'epoch': 0.23} + 23%|██▎ | 2850/12188 [6:09:56<19:24:00, 7.48s/it] 23%|██▎ | 2851/12188 [6:10:04<19:14:11, 7.42s/it] {'loss': 0.3864, 'grad_norm': 0.6223410778258939, 'learning_rate': 8.949650961049479e-06, 'epoch': 0.23} + 23%|██▎ | 2851/12188 [6:10:04<19:14:11, 7.42s/it] 23%|██▎ | 2852/12188 [6:10:11<19:01:55, 7.34s/it] {'loss': 0.3711, 'grad_norm': 0.6323982676845853, 'learning_rate': 8.948836063310282e-06, 'epoch': 0.23} + 23%|██▎ | 2852/12188 [6:10:11<19:01:55, 7.34s/it] 23%|██▎ | 2853/12188 [6:10:18<18:39:24, 7.19s/it] {'loss': 0.4038, 'grad_norm': 0.6470325707757156, 'learning_rate': 8.948020886710628e-06, 'epoch': 0.23} + 23%|██▎ | 2853/12188 [6:10:18<18:39:24, 7.19s/it] 23%|██▎ | 2854/12188 [6:10:25<18:43:27, 7.22s/it] {'loss': 0.359, 'grad_norm': 0.5837683183602504, 'learning_rate': 8.94720543130808e-06, 'epoch': 0.23} + 23%|██▎ | 2854/12188 [6:10:25<18:43:27, 7.22s/it] 23%|██▎ | 2855/12188 [6:10:32<18:32:42, 7.15s/it] {'loss': 0.3559, 'grad_norm': 0.6088437018672478, 'learning_rate': 8.94638969716023e-06, 'epoch': 0.23} + 23%|██▎ | 2855/12188 [6:10:32<18:32:42, 7.15s/it] 23%|██▎ | 2856/12188 [6:10:39<18:24:28, 7.10s/it] {'loss': 0.4436, 'grad_norm': 0.6512790594952518, 'learning_rate': 8.945573684324678e-06, 'epoch': 0.23} + 23%|██▎ | 2856/12188 [6:10:39<18:24:28, 7.10s/it] 23%|██▎ | 2857/12188 [6:10:46<18:22:34, 7.09s/it] {'loss': 0.3641, 'grad_norm': 0.6660856469952046, 'learning_rate': 8.944757392859051e-06, 'epoch': 0.23} + 23%|██▎ | 2857/12188 [6:10:46<18:22:34, 7.09s/it] 23%|██▎ | 2858/12188 [6:10:53<18:19:59, 7.07s/it] {'loss': 0.3773, 'grad_norm': 1.1417493521102622, 'learning_rate': 8.943940822820995e-06, 'epoch': 0.23} + 23%|██▎ | 2858/12188 [6:10:53<18:19:59, 7.07s/it] 23%|██▎ | 2859/12188 [6:11:01<19:02:17, 7.35s/it] {'loss': 0.3777, 'grad_norm': 0.72666207395507, 'learning_rate': 8.943123974268176e-06, 'epoch': 0.23} + 23%|██▎ | 2859/12188 [6:11:01<19:02:17, 7.35s/it] 23%|██▎ | 2860/12188 [6:11:08<18:32:55, 7.16s/it] {'loss': 0.4124, 'grad_norm': 0.7084710293200186, 'learning_rate': 8.942306847258274e-06, 'epoch': 0.23} + 23%|██▎ | 2860/12188 [6:11:08<18:32:55, 7.16s/it] 23%|██▎ | 2861/12188 [6:11:15<18:49:41, 7.27s/it] {'loss': 0.3679, 'grad_norm': 0.6067822316659417, 'learning_rate': 8.941489441848998e-06, 'epoch': 0.23} + 23%|██▎ | 2861/12188 [6:11:15<18:49:41, 7.27s/it] 23%|██▎ | 2862/12188 [6:11:23<19:11:18, 7.41s/it] {'loss': 0.3877, 'grad_norm': 0.6698828780806523, 'learning_rate': 8.94067175809807e-06, 'epoch': 0.23} + 23%|██▎ | 2862/12188 [6:11:23<19:11:18, 7.41s/it] 23%|██▎ | 2863/12188 [6:11:30<18:53:46, 7.30s/it] {'loss': 0.3781, 'grad_norm': 0.6556076532304588, 'learning_rate': 8.939853796063233e-06, 'epoch': 0.23} + 23%|██▎ | 2863/12188 [6:11:30<18:53:46, 7.30s/it] 23%|██▎ | 2864/12188 [6:11:37<18:37:50, 7.19s/it] {'loss': 0.4005, 'grad_norm': 0.6578205863896038, 'learning_rate': 8.939035555802252e-06, 'epoch': 0.23} + 23%|██▎ | 2864/12188 [6:11:37<18:37:50, 7.19s/it] 24%|██▎ | 2865/12188 [6:11:45<19:01:05, 7.34s/it] {'loss': 0.3574, 'grad_norm': 0.5811174706792741, 'learning_rate': 8.938217037372906e-06, 'epoch': 0.24} + 24%|██▎ | 2865/12188 [6:11:45<19:01:05, 7.34s/it] 24%|██▎ | 2866/12188 [6:11:51<18:37:26, 7.19s/it] {'loss': 0.3604, 'grad_norm': 0.637130252938261, 'learning_rate': 8.937398240833003e-06, 'epoch': 0.24} + 24%|██▎ | 2866/12188 [6:11:51<18:37:26, 7.19s/it] 24%|██▎ | 2867/12188 [6:11:59<18:43:41, 7.23s/it] {'loss': 0.3601, 'grad_norm': 0.5871356700218356, 'learning_rate': 8.936579166240361e-06, 'epoch': 0.24} + 24%|██▎ | 2867/12188 [6:11:59<18:43:41, 7.23s/it] 24%|██▎ | 2868/12188 [6:12:06<18:36:38, 7.19s/it] {'loss': 0.3564, 'grad_norm': 0.6021555393083228, 'learning_rate': 8.935759813652823e-06, 'epoch': 0.24} + 24%|██▎ | 2868/12188 [6:12:06<18:36:38, 7.19s/it] 24%|██▎ | 2869/12188 [6:12:14<19:00:42, 7.34s/it] {'loss': 0.41, 'grad_norm': 0.6446333887410599, 'learning_rate': 8.93494018312825e-06, 'epoch': 0.24} + 24%|██▎ | 2869/12188 [6:12:14<19:00:42, 7.34s/it] 24%|██▎ | 2870/12188 [6:12:22<19:33:34, 7.56s/it] {'loss': 0.3846, 'grad_norm': 0.674192447075651, 'learning_rate': 8.934120274724524e-06, 'epoch': 0.24} + 24%|██▎ | 2870/12188 [6:12:22<19:33:34, 7.56s/it] 24%|██▎ | 2871/12188 [6:12:30<20:19:57, 7.86s/it] {'loss': 0.3838, 'grad_norm': 0.6215204071546431, 'learning_rate': 8.933300088499544e-06, 'epoch': 0.24} + 24%|██▎ | 2871/12188 [6:12:30<20:19:57, 7.86s/it] 24%|██▎ | 2872/12188 [6:12:38<20:08:54, 7.79s/it] {'loss': 0.3645, 'grad_norm': 0.5919991432779403, 'learning_rate': 8.932479624511233e-06, 'epoch': 0.24} + 24%|██▎ | 2872/12188 [6:12:38<20:08:54, 7.79s/it] 24%|██▎ | 2873/12188 [6:12:45<19:36:04, 7.58s/it] {'loss': 0.3705, 'grad_norm': 0.6591273631512962, 'learning_rate': 8.931658882817526e-06, 'epoch': 0.24} + 24%|██▎ | 2873/12188 [6:12:45<19:36:04, 7.58s/it] 24%|██▎ | 2874/12188 [6:12:52<18:55:36, 7.32s/it] {'loss': 0.3678, 'grad_norm': 0.6149007667897357, 'learning_rate': 8.930837863476386e-06, 'epoch': 0.24} + 24%|██▎ | 2874/12188 [6:12:52<18:55:36, 7.32s/it] 24%|██▎ | 2875/12188 [6:12:58<18:32:22, 7.17s/it] {'loss': 0.3868, 'grad_norm': 0.6451157429586025, 'learning_rate': 8.930016566545791e-06, 'epoch': 0.24} + 24%|██▎ | 2875/12188 [6:12:58<18:32:22, 7.17s/it] 24%|██▎ | 2876/12188 [6:13:05<18:04:26, 6.99s/it] {'loss': 0.3437, 'grad_norm': 0.6347826799097732, 'learning_rate': 8.929194992083742e-06, 'epoch': 0.24} + 24%|██▎ | 2876/12188 [6:13:05<18:04:26, 6.99s/it] 24%|██▎ | 2877/12188 [6:13:16<20:50:23, 8.06s/it] {'loss': 0.3827, 'grad_norm': 0.6163991769587065, 'learning_rate': 8.928373140148256e-06, 'epoch': 0.24} + 24%|██▎ | 2877/12188 [6:13:16<20:50:23, 8.06s/it] 24%|██▎ | 2878/12188 [6:13:23<20:37:25, 7.97s/it] {'loss': 0.3644, 'grad_norm': 0.5732857456768983, 'learning_rate': 8.927551010797368e-06, 'epoch': 0.24} + 24%|██▎ | 2878/12188 [6:13:23<20:37:25, 7.97s/it] 24%|██▎ | 2879/12188 [6:13:30<19:59:19, 7.73s/it] {'loss': 0.3375, 'grad_norm': 0.7247437718389891, 'learning_rate': 8.926728604089138e-06, 'epoch': 0.24} + 24%|██▎ | 2879/12188 [6:13:30<19:59:19, 7.73s/it] 24%|██▎ | 2880/12188 [6:13:38<19:40:39, 7.61s/it] {'loss': 0.3893, 'grad_norm': 0.6174532971840703, 'learning_rate': 8.925905920081644e-06, 'epoch': 0.24} + 24%|██▎ | 2880/12188 [6:13:38<19:40:39, 7.61s/it] 24%|██▎ | 2881/12188 [6:13:45<19:01:41, 7.36s/it] {'loss': 0.3274, 'grad_norm': 0.5518504980687496, 'learning_rate': 8.92508295883298e-06, 'epoch': 0.24} + 24%|██▎ | 2881/12188 [6:13:45<19:01:41, 7.36s/it] 24%|██▎ | 2882/12188 [6:13:52<19:14:34, 7.44s/it] {'loss': 0.403, 'grad_norm': 0.6621168245382867, 'learning_rate': 8.924259720401266e-06, 'epoch': 0.24} + 24%|██▎ | 2882/12188 [6:13:52<19:14:34, 7.44s/it] 24%|██▎ | 2883/12188 [6:14:02<20:46:54, 8.04s/it] {'loss': 0.364, 'grad_norm': 0.6469375177910959, 'learning_rate': 8.923436204844634e-06, 'epoch': 0.24} + 24%|██▎ | 2883/12188 [6:14:02<20:46:54, 8.04s/it] 24%|██▎ | 2884/12188 [6:14:08<19:45:31, 7.65s/it] {'loss': 0.3943, 'grad_norm': 0.6693256929742419, 'learning_rate': 8.92261241222124e-06, 'epoch': 0.24} + 24%|██▎ | 2884/12188 [6:14:08<19:45:31, 7.65s/it] 24%|██▎ | 2885/12188 [6:14:15<19:15:58, 7.46s/it] {'loss': 0.3697, 'grad_norm': 0.6434986567676918, 'learning_rate': 8.921788342589259e-06, 'epoch': 0.24} + 24%|██▎ | 2885/12188 [6:14:15<19:15:58, 7.46s/it] 24%|██▎ | 2886/12188 [6:14:23<19:02:16, 7.37s/it] {'loss': 0.3608, 'grad_norm': 0.619912294918691, 'learning_rate': 8.920963996006886e-06, 'epoch': 0.24} + 24%|██▎ | 2886/12188 [6:14:23<19:02:16, 7.37s/it] 24%|██▎ | 2887/12188 [6:14:30<18:48:24, 7.28s/it] {'loss': 0.3582, 'grad_norm': 0.5919551511495076, 'learning_rate': 8.920139372532337e-06, 'epoch': 0.24} + 24%|██▎ | 2887/12188 [6:14:30<18:48:24, 7.28s/it] 24%|██▎ | 2888/12188 [6:14:36<18:20:55, 7.10s/it] {'loss': 0.3795, 'grad_norm': 0.6508571365693582, 'learning_rate': 8.919314472223844e-06, 'epoch': 0.24} + 24%|██▎ | 2888/12188 [6:14:36<18:20:55, 7.10s/it] 24%|██▎ | 2889/12188 [6:14:44<18:52:21, 7.31s/it] {'loss': 0.3873, 'grad_norm': 0.6314271336521535, 'learning_rate': 8.918489295139658e-06, 'epoch': 0.24} + 24%|██▎ | 2889/12188 [6:14:44<18:52:21, 7.31s/it] 24%|██▎ | 2890/12188 [6:14:51<18:26:27, 7.14s/it] {'loss': 0.4051, 'grad_norm': 0.6708615986499037, 'learning_rate': 8.917663841338053e-06, 'epoch': 0.24} + 24%|██▎ | 2890/12188 [6:14:51<18:26:27, 7.14s/it] 24%|██▎ | 2891/12188 [6:14:58<18:47:12, 7.27s/it] {'loss': 0.4113, 'grad_norm': 0.699905471308614, 'learning_rate': 8.916838110877324e-06, 'epoch': 0.24} + 24%|██▎ | 2891/12188 [6:14:58<18:47:12, 7.27s/it] 24%|██▎ | 2892/12188 [6:15:06<19:19:18, 7.48s/it] {'loss': 0.3643, 'grad_norm': 0.5887714025289545, 'learning_rate': 8.916012103815779e-06, 'epoch': 0.24} + 24%|██▎ | 2892/12188 [6:15:06<19:19:18, 7.48s/it] 24%|██▎ | 2893/12188 [6:15:14<19:17:30, 7.47s/it] {'loss': 0.3687, 'grad_norm': 0.6251878422598868, 'learning_rate': 8.915185820211752e-06, 'epoch': 0.24} + 24%|██▎ | 2893/12188 [6:15:14<19:17:30, 7.47s/it] 24%|██▎ | 2894/12188 [6:15:21<18:48:20, 7.28s/it] {'loss': 0.365, 'grad_norm': 0.6364729293854063, 'learning_rate': 8.914359260123591e-06, 'epoch': 0.24} + 24%|██▎ | 2894/12188 [6:15:21<18:48:20, 7.28s/it] 24%|██▍ | 2895/12188 [6:15:29<19:24:39, 7.52s/it] {'loss': 0.3432, 'grad_norm': 0.6662931769749016, 'learning_rate': 8.913532423609669e-06, 'epoch': 0.24} + 24%|██▍ | 2895/12188 [6:15:29<19:24:39, 7.52s/it] 24%|██▍ | 2896/12188 [6:15:37<19:41:08, 7.63s/it] {'loss': 0.3628, 'grad_norm': 0.6280052768572216, 'learning_rate': 8.912705310728374e-06, 'epoch': 0.24} + 24%|██▍ | 2896/12188 [6:15:37<19:41:08, 7.63s/it] 24%|██▍ | 2897/12188 [6:15:44<19:09:48, 7.43s/it] {'loss': 0.3462, 'grad_norm': 0.6036907500157082, 'learning_rate': 8.911877921538117e-06, 'epoch': 0.24} + 24%|██▍ | 2897/12188 [6:15:44<19:09:48, 7.43s/it] 24%|██▍ | 2898/12188 [6:15:51<19:19:23, 7.49s/it] {'loss': 0.4233, 'grad_norm': 0.7014054968638831, 'learning_rate': 8.911050256097325e-06, 'epoch': 0.24} + 24%|██▍ | 2898/12188 [6:15:51<19:19:23, 7.49s/it] 24%|██▍ | 2899/12188 [6:15:58<18:43:24, 7.26s/it] {'loss': 0.3563, 'grad_norm': 0.6778717261016785, 'learning_rate': 8.91022231446445e-06, 'epoch': 0.24} + 24%|██▍ | 2899/12188 [6:15:58<18:43:24, 7.26s/it] 24%|██▍ | 2900/12188 [6:16:05<18:15:40, 7.08s/it] {'loss': 0.3837, 'grad_norm': 0.7435442881354691, 'learning_rate': 8.909394096697954e-06, 'epoch': 0.24} + 24%|██▍ | 2900/12188 [6:16:05<18:15:40, 7.08s/it] 24%|██▍ | 2901/12188 [6:16:11<18:04:57, 7.01s/it] {'loss': 0.4098, 'grad_norm': 0.6541384230453, 'learning_rate': 8.908565602856331e-06, 'epoch': 0.24} + 24%|██▍ | 2901/12188 [6:16:11<18:04:57, 7.01s/it] 24%|██▍ | 2902/12188 [6:16:18<17:48:02, 6.90s/it] {'loss': 0.3767, 'grad_norm': 0.6082014240568439, 'learning_rate': 8.907736832998084e-06, 'epoch': 0.24} + 24%|██▍ | 2902/12188 [6:16:18<17:48:02, 6.90s/it] 24%|██▍ | 2903/12188 [6:16:25<17:50:39, 6.92s/it] {'loss': 0.3683, 'grad_norm': 0.6247864116744205, 'learning_rate': 8.906907787181738e-06, 'epoch': 0.24} + 24%|██▍ | 2903/12188 [6:16:25<17:50:39, 6.92s/it] 24%|██▍ | 2904/12188 [6:16:32<17:31:32, 6.80s/it] {'loss': 0.3957, 'grad_norm': 0.65853141758166, 'learning_rate': 8.906078465465842e-06, 'epoch': 0.24} + 24%|██▍ | 2904/12188 [6:16:32<17:31:32, 6.80s/it] 24%|██▍ | 2905/12188 [6:16:41<19:11:00, 7.44s/it] {'loss': 0.3742, 'grad_norm': 0.5928337123809184, 'learning_rate': 8.90524886790896e-06, 'epoch': 0.24} + 24%|██▍ | 2905/12188 [6:16:41<19:11:00, 7.44s/it] 24%|██▍ | 2906/12188 [6:16:48<19:09:14, 7.43s/it] {'loss': 0.3687, 'grad_norm': 0.6558475490514107, 'learning_rate': 8.90441899456968e-06, 'epoch': 0.24} + 24%|██▍ | 2906/12188 [6:16:48<19:09:14, 7.43s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7ff18018e070> +[Try #0] Failed to fetch sample 4533194 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7ff18018e070> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'talk page'"}, {'from': 'gpt', 'value': '\nclick(x=0.9325, y=0.271)\n'}]} + 24%|██▍ | 2907/12188 [6:16:55<18:32:33, 7.19s/it] {'loss': 0.3963, 'grad_norm': 0.6722866966202312, 'learning_rate': 8.903588845506602e-06, 'epoch': 0.24} + 24%|██▍ | 2907/12188 [6:16:55<18:32:33, 7.19s/it] 24%|██▍ | 2908/12188 [6:17:02<18:44:43, 7.27s/it] {'loss': 0.3627, 'grad_norm': 0.6362781646396646, 'learning_rate': 8.90275842077835e-06, 'epoch': 0.24} + 24%|██▍ | 2908/12188 [6:17:02<18:44:43, 7.27s/it] 24%|██▍ | 2909/12188 [6:17:09<18:28:54, 7.17s/it] {'loss': 0.3721, 'grad_norm': 0.624801228882643, 'learning_rate': 8.90192772044357e-06, 'epoch': 0.24} + 24%|██▍ | 2909/12188 [6:17:09<18:28:54, 7.17s/it] 24%|██▍ | 2910/12188 [6:17:16<18:11:53, 7.06s/it] {'loss': 0.3883, 'grad_norm': 0.692194649705521, 'learning_rate': 8.901096744560922e-06, 'epoch': 0.24} + 24%|██▍ | 2910/12188 [6:17:16<18:11:53, 7.06s/it] 24%|██▍ | 2911/12188 [6:17:22<17:53:20, 6.94s/it] {'loss': 0.372, 'grad_norm': 0.6189195145213632, 'learning_rate': 8.900265493189088e-06, 'epoch': 0.24} + 24%|██▍ | 2911/12188 [6:17:22<17:53:20, 6.94s/it] 24%|██▍ | 2912/12188 [6:17:30<18:38:30, 7.23s/it] {'loss': 0.3389, 'grad_norm': 0.6637043928086939, 'learning_rate': 8.899433966386774e-06, 'epoch': 0.24} + 24%|██▍ | 2912/12188 [6:17:30<18:38:30, 7.23s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 24%|██▍ | 2913/12188 [6:17:37<18:12:51, 7.07s/it] {'loss': 0.6889, 'grad_norm': 0.8684461174639787, 'learning_rate': 8.898602164212699e-06, 'epoch': 0.24} + 24%|██▍ | 2913/12188 [6:17:37<18:12:51, 7.07s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7ff18018e160> +[Try #0] Failed to fetch sample 4772305 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7ff18018e160> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Daily Deals'"}, {'from': 'gpt', 'value': '\nclick(x=0.3495, y=0.121)\n'}]} + 24%|██▍ | 2914/12188 [6:17:44<18:08:19, 7.04s/it] {'loss': 0.376, 'grad_norm': 0.6435246012276016, 'learning_rate': 8.8977700867256e-06, 'epoch': 0.24} + 24%|██▍ | 2914/12188 [6:17:44<18:08:19, 7.04s/it] 24%|██▍ | 2915/12188 [6:17:51<18:26:58, 7.16s/it] {'loss': 0.3339, 'grad_norm': 0.6559197268086102, 'learning_rate': 8.896937733984241e-06, 'epoch': 0.24} + 24%|██▍ | 2915/12188 [6:17:51<18:26:58, 7.16s/it] 24%|██▍ | 2916/12188 [6:17:58<18:20:26, 7.12s/it] {'loss': 0.3403, 'grad_norm': 0.6232006263404782, 'learning_rate': 8.896105106047402e-06, 'epoch': 0.24} + 24%|██▍ | 2916/12188 [6:17:58<18:20:26, 7.12s/it] 24%|██▍ | 2917/12188 [6:18:05<18:04:02, 7.02s/it] {'loss': 0.3551, 'grad_norm': 0.6134458403535092, 'learning_rate': 8.895272202973878e-06, 'epoch': 0.24} + 24%|██▍ | 2917/12188 [6:18:05<18:04:02, 7.02s/it] 24%|██▍ | 2918/12188 [6:18:12<17:54:27, 6.95s/it] {'loss': 0.3553, 'grad_norm': 0.6260810664758947, 'learning_rate': 8.894439024822488e-06, 'epoch': 0.24} + 24%|██▍ | 2918/12188 [6:18:12<17:54:27, 6.95s/it] 24%|██▍ | 2919/12188 [6:18:19<17:39:27, 6.86s/it] {'loss': 0.3716, 'grad_norm': 0.6969805421703364, 'learning_rate': 8.893605571652073e-06, 'epoch': 0.24} + 24%|██▍ | 2919/12188 [6:18:19<17:39:27, 6.86s/it] 24%|██▍ | 2920/12188 [6:18:26<18:03:01, 7.01s/it] {'loss': 0.3738, 'grad_norm': 0.6146901423571102, 'learning_rate': 8.892771843521487e-06, 'epoch': 0.24} + 24%|██▍ | 2920/12188 [6:18:26<18:03:01, 7.01s/it] 24%|██▍ | 2921/12188 [6:18:33<17:57:21, 6.98s/it] {'loss': 0.3798, 'grad_norm': 0.6435564088146203, 'learning_rate': 8.89193784048961e-06, 'epoch': 0.24} + 24%|██▍ | 2921/12188 [6:18:33<17:57:21, 6.98s/it] 24%|██▍ | 2922/12188 [6:18:41<18:29:50, 7.19s/it] {'loss': 0.3708, 'grad_norm': 0.6699994576962305, 'learning_rate': 8.891103562615332e-06, 'epoch': 0.24} + 24%|██▍ | 2922/12188 [6:18:41<18:29:50, 7.19s/it] 24%|██▍ | 2923/12188 [6:18:50<20:04:19, 7.80s/it] {'loss': 0.3599, 'grad_norm': 0.651367205231876, 'learning_rate': 8.890269009957572e-06, 'epoch': 0.24} + 24%|██▍ | 2923/12188 [6:18:50<20:04:19, 7.80s/it] 24%|██▍ | 2924/12188 [6:18:56<19:05:49, 7.42s/it] {'loss': 0.3848, 'grad_norm': 0.6854914543971296, 'learning_rate': 8.889434182575268e-06, 'epoch': 0.24} + 24%|██▍ | 2924/12188 [6:18:56<19:05:49, 7.42s/it] 24%|██▍ | 2925/12188 [6:19:03<18:40:52, 7.26s/it] {'loss': 0.4014, 'grad_norm': 0.6424518915258374, 'learning_rate': 8.888599080527369e-06, 'epoch': 0.24} + 24%|██▍ | 2925/12188 [6:19:03<18:40:52, 7.26s/it] 24%|██▍ | 2926/12188 [6:19:13<20:28:17, 7.96s/it] {'loss': 0.3518, 'grad_norm': 0.6328570802454909, 'learning_rate': 8.88776370387285e-06, 'epoch': 0.24} + 24%|██▍ | 2926/12188 [6:19:13<20:28:17, 7.96s/it] 24%|██▍ | 2927/12188 [6:19:24<22:37:45, 8.80s/it] {'loss': 0.3725, 'grad_norm': 0.6178950341582466, 'learning_rate': 8.886928052670705e-06, 'epoch': 0.24} + 24%|██▍ | 2927/12188 [6:19:24<22:37:45, 8.80s/it] 24%|██▍ | 2928/12188 [6:19:31<21:26:46, 8.34s/it] {'loss': 0.3417, 'grad_norm': 0.5799861657001326, 'learning_rate': 8.886092126979947e-06, 'epoch': 0.24} + 24%|██▍ | 2928/12188 [6:19:31<21:26:46, 8.34s/it] 24%|██▍ | 2929/12188 [6:19:38<20:17:03, 7.89s/it] {'loss': 0.4144, 'grad_norm': 0.6375117269402045, 'learning_rate': 8.885255926859604e-06, 'epoch': 0.24} + 24%|██▍ | 2929/12188 [6:19:38<20:17:03, 7.89s/it] 24%|██▍ | 2930/12188 [6:19:45<19:59:13, 7.77s/it] {'loss': 0.3917, 'grad_norm': 0.6888364022092032, 'learning_rate': 8.884419452368731e-06, 'epoch': 0.24} + 24%|██▍ | 2930/12188 [6:19:45<19:59:13, 7.77s/it] 24%|██▍ | 2931/12188 [6:19:53<19:37:34, 7.63s/it] {'loss': 0.3653, 'grad_norm': 0.5967390926033186, 'learning_rate': 8.883582703566396e-06, 'epoch': 0.24} + 24%|██▍ | 2931/12188 [6:19:53<19:37:34, 7.63s/it] 24%|██▍ | 2932/12188 [6:20:00<19:27:11, 7.57s/it] {'loss': 0.366, 'grad_norm': 0.637205329497468, 'learning_rate': 8.882745680511691e-06, 'epoch': 0.24} + 24%|██▍ | 2932/12188 [6:20:00<19:27:11, 7.57s/it] 24%|██▍ | 2933/12188 [6:20:07<18:40:51, 7.27s/it] {'loss': 0.3495, 'grad_norm': 0.7285719588182957, 'learning_rate': 8.881908383263724e-06, 'epoch': 0.24} + 24%|██▍ | 2933/12188 [6:20:07<18:40:51, 7.27s/it] 24%|██▍ | 2934/12188 [6:20:14<18:36:56, 7.24s/it] {'loss': 0.3619, 'grad_norm': 0.62935659474105, 'learning_rate': 8.881070811881625e-06, 'epoch': 0.24} + 24%|██▍ | 2934/12188 [6:20:14<18:36:56, 7.24s/it] 24%|██▍ | 2935/12188 [6:20:21<18:29:39, 7.20s/it] {'loss': 0.4114, 'grad_norm': 0.59605408510328, 'learning_rate': 8.880232966424538e-06, 'epoch': 0.24} + 24%|██▍ | 2935/12188 [6:20:21<18:29:39, 7.20s/it] 24%|██▍ | 2936/12188 [6:20:28<18:11:03, 7.08s/it] {'loss': 0.3787, 'grad_norm': 0.6019998863293899, 'learning_rate': 8.879394846951634e-06, 'epoch': 0.24} + 24%|██▍ | 2936/12188 [6:20:28<18:11:03, 7.08s/it] 24%|██▍ | 2937/12188 [6:20:36<19:01:54, 7.41s/it] {'loss': 0.376, 'grad_norm': 0.665972488237809, 'learning_rate': 8.8785564535221e-06, 'epoch': 0.24} + 24%|██▍ | 2937/12188 [6:20:36<19:01:54, 7.41s/it] 24%|██▍ | 2938/12188 [6:20:43<18:56:20, 7.37s/it] {'loss': 0.3302, 'grad_norm': 0.5781642148684826, 'learning_rate': 8.877717786195139e-06, 'epoch': 0.24} + 24%|██▍ | 2938/12188 [6:20:43<18:56:20, 7.37s/it] 24%|██▍ | 2939/12188 [6:20:50<18:42:19, 7.28s/it] {'loss': 0.3911, 'grad_norm': 3.7848056234878324, 'learning_rate': 8.87687884502998e-06, 'epoch': 0.24} + 24%|██▍ | 2939/12188 [6:20:50<18:42:19, 7.28s/it] 24%|██▍ | 2940/12188 [6:20:58<19:30:05, 7.59s/it] {'loss': 0.3639, 'grad_norm': 0.6025707738219767, 'learning_rate': 8.876039630085864e-06, 'epoch': 0.24} + 24%|██▍ | 2940/12188 [6:20:58<19:30:05, 7.59s/it] 24%|██▍ | 2941/12188 [6:21:07<20:23:48, 7.94s/it] {'loss': 0.3565, 'grad_norm': 0.6217113902553154, 'learning_rate': 8.875200141422055e-06, 'epoch': 0.24} + 24%|██▍ | 2941/12188 [6:21:07<20:23:48, 7.94s/it] 24%|██▍ | 2942/12188 [6:21:14<19:38:50, 7.65s/it] {'loss': 0.3816, 'grad_norm': 0.6725469128729444, 'learning_rate': 8.874360379097842e-06, 'epoch': 0.24} + 24%|██▍ | 2942/12188 [6:21:14<19:38:50, 7.65s/it] 24%|██▍ | 2943/12188 [6:21:21<19:10:48, 7.47s/it] {'loss': 0.378, 'grad_norm': 0.6701578198273084, 'learning_rate': 8.87352034317252e-06, 'epoch': 0.24} + 24%|██▍ | 2943/12188 [6:21:21<19:10:48, 7.47s/it] 24%|██▍ | 2944/12188 [6:21:28<18:51:28, 7.34s/it] {'loss': 0.4025, 'grad_norm': 0.6525777360426738, 'learning_rate': 8.872680033705417e-06, 'epoch': 0.24} + 24%|██▍ | 2944/12188 [6:21:28<18:51:28, 7.34s/it] 24%|██▍ | 2945/12188 [6:21:35<18:23:40, 7.16s/it] {'loss': 0.3682, 'grad_norm': 0.8122632291807959, 'learning_rate': 8.871839450755871e-06, 'epoch': 0.24} + 24%|██▍ | 2945/12188 [6:21:35<18:23:40, 7.16s/it] 24%|██▍ | 2946/12188 [6:21:43<18:50:00, 7.34s/it] {'loss': 0.3429, 'grad_norm': 0.7128929761661555, 'learning_rate': 8.870998594383243e-06, 'epoch': 0.24} + 24%|██▍ | 2946/12188 [6:21:43<18:50:00, 7.34s/it] 24%|██▍ | 2947/12188 [6:21:49<18:23:11, 7.16s/it] {'loss': 0.3743, 'grad_norm': 0.6766362915481693, 'learning_rate': 8.870157464646914e-06, 'epoch': 0.24} + 24%|██▍ | 2947/12188 [6:21:50<18:23:11, 7.16s/it] 24%|██▍ | 2948/12188 [6:21:56<18:00:26, 7.02s/it] {'loss': 0.3982, 'grad_norm': 0.658855529971069, 'learning_rate': 8.869316061606284e-06, 'epoch': 0.24} + 24%|██▍ | 2948/12188 [6:21:56<18:00:26, 7.02s/it] 24%|██▍ | 2949/12188 [6:22:03<17:46:59, 6.93s/it] {'loss': 0.3816, 'grad_norm': 1.8583630306099317, 'learning_rate': 8.868474385320768e-06, 'epoch': 0.24} + 24%|██▍ | 2949/12188 [6:22:03<17:46:59, 6.93s/it] 24%|██▍ | 2950/12188 [6:22:10<17:52:56, 6.97s/it] {'loss': 0.3779, 'grad_norm': 0.6813865636272286, 'learning_rate': 8.867632435849807e-06, 'epoch': 0.24} + 24%|██▍ | 2950/12188 [6:22:10<17:52:56, 6.97s/it] 24%|██▍ | 2951/12188 [6:22:17<17:49:06, 6.94s/it] {'loss': 0.3634, 'grad_norm': 0.7115334542252896, 'learning_rate': 8.866790213252857e-06, 'epoch': 0.24} + 24%|██▍ | 2951/12188 [6:22:17<17:49:06, 6.94s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f0cd6a68e50> +[Try #0] Failed to fetch sample 4854979 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f0cd6a68e50> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Close split screen'"}, {'from': 'gpt', 'value': '\nclick(x=0.83, y=0.0705)\n'}]} + 24%|██▍ | 2952/12188 [6:22:25<18:34:04, 7.24s/it] {'loss': 0.4151, 'grad_norm': 1.0315392331445858, 'learning_rate': 8.865947717589395e-06, 'epoch': 0.24} + 24%|██▍ | 2952/12188 [6:22:25<18:34:04, 7.24s/it] 24%|██▍ | 2953/12188 [6:22:32<18:52:05, 7.36s/it] {'loss': 0.3617, 'grad_norm': 1.3275459719613265, 'learning_rate': 8.865104948918919e-06, 'epoch': 0.24} + 24%|██▍ | 2953/12188 [6:22:32<18:52:05, 7.36s/it] 24%|██▍ | 2954/12188 [6:22:39<18:38:39, 7.27s/it] {'loss': 0.3903, 'grad_norm': 0.819239676945428, 'learning_rate': 8.864261907300938e-06, 'epoch': 0.24} + 24%|██▍ | 2954/12188 [6:22:39<18:38:39, 7.27s/it] 24%|██▍ | 2955/12188 [6:22:46<18:22:07, 7.16s/it] {'loss': 0.3733, 'grad_norm': 0.5979350611811772, 'learning_rate': 8.86341859279499e-06, 'epoch': 0.24} + 24%|██▍ | 2955/12188 [6:22:46<18:22:07, 7.16s/it] 24%|██▍ | 2956/12188 [6:22:53<18:00:08, 7.02s/it] {'loss': 0.3803, 'grad_norm': 1.0125180520667478, 'learning_rate': 8.862575005460627e-06, 'epoch': 0.24} + 24%|██▍ | 2956/12188 [6:22:53<18:00:08, 7.02s/it] 24%|██▍ | 2957/12188 [6:23:00<18:02:53, 7.04s/it] {'loss': 0.3559, 'grad_norm': 0.6980702206783775, 'learning_rate': 8.861731145357425e-06, 'epoch': 0.24} + 24%|██▍ | 2957/12188 [6:23:00<18:02:53, 7.04s/it] 24%|██▍ | 2958/12188 [6:23:07<17:58:46, 7.01s/it] {'loss': 0.3581, 'grad_norm': 0.6846648321188114, 'learning_rate': 8.860887012544973e-06, 'epoch': 0.24} + 24%|██▍ | 2958/12188 [6:23:07<17:58:46, 7.01s/it] 24%|██▍ | 2959/12188 [6:23:14<17:57:49, 7.01s/it] {'loss': 0.407, 'grad_norm': 3.8836180252393167, 'learning_rate': 8.860042607082884e-06, 'epoch': 0.24} + 24%|██▍ | 2959/12188 [6:23:14<17:57:49, 7.01s/it] 24%|██▍ | 2960/12188 [6:23:21<17:50:02, 6.96s/it] {'loss': 0.353, 'grad_norm': 0.7166740670075609, 'learning_rate': 8.859197929030787e-06, 'epoch': 0.24} + 24%|██▍ | 2960/12188 [6:23:21<17:50:02, 6.96s/it] 24%|██▍ | 2961/12188 [6:23:28<17:34:40, 6.86s/it] {'loss': 0.3527, 'grad_norm': 68.18688749380681, 'learning_rate': 8.858352978448332e-06, 'epoch': 0.24} + 24%|██▍ | 2961/12188 [6:23:28<17:34:40, 6.86s/it] 24%|██▍ | 2962/12188 [6:23:36<18:29:59, 7.22s/it] {'loss': 0.3684, 'grad_norm': 0.6286895626728577, 'learning_rate': 8.857507755395189e-06, 'epoch': 0.24} + 24%|██▍ | 2962/12188 [6:23:36<18:29:59, 7.22s/it] 24%|██▍ | 2963/12188 [6:23:43<18:41:10, 7.29s/it] {'loss': 0.344, 'grad_norm': 0.7742085822591436, 'learning_rate': 8.856662259931044e-06, 'epoch': 0.24} + 24%|██▍ | 2963/12188 [6:23:43<18:41:10, 7.29s/it] 24%|██▍ | 2964/12188 [6:23:50<18:19:03, 7.15s/it] {'loss': 0.3798, 'grad_norm': 0.6397985939186634, 'learning_rate': 8.85581649211561e-06, 'epoch': 0.24} + 24%|██▍ | 2964/12188 [6:23:50<18:19:03, 7.15s/it] 24%|██▍ | 2965/12188 [6:23:58<18:55:13, 7.39s/it] {'loss': 0.4344, 'grad_norm': 0.7391188921762428, 'learning_rate': 8.854970452008607e-06, 'epoch': 0.24} + 24%|██▍ | 2965/12188 [6:23:58<18:55:13, 7.39s/it] 24%|██▍ | 2966/12188 [6:24:05<18:53:53, 7.38s/it] {'loss': 0.3522, 'grad_norm': 0.8616388630966433, 'learning_rate': 8.854124139669786e-06, 'epoch': 0.24} + 24%|██▍ | 2966/12188 [6:24:05<18:53:53, 7.38s/it] 24%|██▍ | 2967/12188 [6:24:12<18:27:38, 7.21s/it] {'loss': 0.3494, 'grad_norm': 0.8023915485466961, 'learning_rate': 8.85327755515891e-06, 'epoch': 0.24} + 24%|██▍ | 2967/12188 [6:24:12<18:27:38, 7.21s/it] 24%|██▍ | 2968/12188 [6:24:19<18:00:45, 7.03s/it] {'loss': 0.3308, 'grad_norm': 0.6581434876308332, 'learning_rate': 8.852430698535763e-06, 'epoch': 0.24} + 24%|██▍ | 2968/12188 [6:24:19<18:00:45, 7.03s/it] 24%|██▍ | 2969/12188 [6:24:27<19:17:38, 7.53s/it] {'loss': 0.3884, 'grad_norm': 0.8750521533295929, 'learning_rate': 8.851583569860152e-06, 'epoch': 0.24} + 24%|██▍ | 2969/12188 [6:24:27<19:17:38, 7.53s/it] 24%|██▍ | 2970/12188 [6:24:35<19:09:52, 7.48s/it] {'loss': 0.348, 'grad_norm': 0.7191204995461383, 'learning_rate': 8.850736169191894e-06, 'epoch': 0.24} + 24%|██▍ | 2970/12188 [6:24:35<19:09:52, 7.48s/it] 24%|██▍ | 2971/12188 [6:24:42<19:19:03, 7.55s/it] {'loss': 0.3829, 'grad_norm': 1.0052298242852982, 'learning_rate': 8.849888496590836e-06, 'epoch': 0.24} + 24%|██▍ | 2971/12188 [6:24:42<19:19:03, 7.55s/it] 24%|██▍ | 2972/12188 [6:24:50<19:11:15, 7.50s/it] {'loss': 0.4099, 'grad_norm': 0.8419282681299323, 'learning_rate': 8.849040552116838e-06, 'epoch': 0.24} + 24%|██▍ | 2972/12188 [6:24:50<19:11:15, 7.50s/it] 24%|██▍ | 2973/12188 [6:24:57<18:40:07, 7.29s/it] {'loss': 0.3996, 'grad_norm': 0.8925208940562116, 'learning_rate': 8.848192335829779e-06, 'epoch': 0.24} + 24%|██▍ | 2973/12188 [6:24:57<18:40:07, 7.29s/it] 24%|██▍ | 2974/12188 [6:25:04<18:31:51, 7.24s/it] {'loss': 0.3568, 'grad_norm': 1.0241052704304432, 'learning_rate': 8.847343847789561e-06, 'epoch': 0.24} + 24%|██▍ | 2974/12188 [6:25:04<18:31:51, 7.24s/it] 24%|██▍ | 2975/12188 [6:25:11<18:12:35, 7.12s/it] {'loss': 0.355, 'grad_norm': 1.137870100440833, 'learning_rate': 8.846495088056102e-06, 'epoch': 0.24} + 24%|██▍ | 2975/12188 [6:25:11<18:12:35, 7.12s/it] 24%|██▍ | 2976/12188 [6:25:20<19:53:56, 7.78s/it] {'loss': 0.3371, 'grad_norm': 1.6860574513736974, 'learning_rate': 8.845646056689338e-06, 'epoch': 0.24} + 24%|██▍ | 2976/12188 [6:25:20<19:53:56, 7.78s/it] 24%|██▍ | 2977/12188 [6:25:26<18:56:22, 7.40s/it] {'loss': 0.3681, 'grad_norm': 1.5749898552406878, 'learning_rate': 8.84479675374923e-06, 'epoch': 0.24} + 24%|██▍ | 2977/12188 [6:25:26<18:56:22, 7.40s/it] 24%|██▍ | 2978/12188 [6:25:33<18:39:37, 7.29s/it] {'loss': 0.4115, 'grad_norm': 0.6993195843086124, 'learning_rate': 8.84394717929575e-06, 'epoch': 0.24} + 24%|██▍ | 2978/12188 [6:25:33<18:39:37, 7.29s/it] 24%|██▍ | 2979/12188 [6:25:41<18:45:52, 7.34s/it] {'loss': 0.3997, 'grad_norm': 1.33168649437014, 'learning_rate': 8.843097333388899e-06, 'epoch': 0.24} + 24%|██▍ | 2979/12188 [6:25:41<18:45:52, 7.34s/it] 24%|██▍ | 2980/12188 [6:25:48<18:18:36, 7.16s/it] {'loss': 0.3524, 'grad_norm': 0.6674422972469435, 'learning_rate': 8.842247216088686e-06, 'epoch': 0.24} + 24%|██▍ | 2980/12188 [6:25:48<18:18:36, 7.16s/it] 24%|██▍ | 2981/12188 [6:25:55<18:16:14, 7.14s/it] {'loss': 0.3737, 'grad_norm': 1.011506674669749, 'learning_rate': 8.841396827455148e-06, 'epoch': 0.24} + 24%|██▍ | 2981/12188 [6:25:55<18:16:14, 7.14s/it] 24%|██▍ | 2982/12188 [6:26:02<18:45:09, 7.33s/it] {'loss': 0.3936, 'grad_norm': 0.9208283085501211, 'learning_rate': 8.84054616754834e-06, 'epoch': 0.24} + 24%|██▍ | 2982/12188 [6:26:02<18:45:09, 7.33s/it] 24%|██▍ | 2983/12188 [6:26:09<18:17:46, 7.16s/it] {'loss': 0.3905, 'grad_norm': 1.7350299210082005, 'learning_rate': 8.83969523642833e-06, 'epoch': 0.24} + 24%|██▍ | 2983/12188 [6:26:09<18:17:46, 7.16s/it] 24%|██▍ | 2984/12188 [6:26:18<19:09:35, 7.49s/it] {'loss': 0.3621, 'grad_norm': 0.6723485109809039, 'learning_rate': 8.838844034155212e-06, 'epoch': 0.24} + 24%|██▍ | 2984/12188 [6:26:18<19:09:35, 7.49s/it] 24%|██▍ | 2985/12188 [6:26:25<19:27:19, 7.61s/it] {'loss': 0.4276, 'grad_norm': 1.3872921989885618, 'learning_rate': 8.837992560789097e-06, 'epoch': 0.24} + 24%|██▍ | 2985/12188 [6:26:25<19:27:19, 7.61s/it] 24%|██▍ | 2986/12188 [6:26:33<19:10:29, 7.50s/it] {'loss': 0.3551, 'grad_norm': 0.7373517874793873, 'learning_rate': 8.83714081639011e-06, 'epoch': 0.24} + 24%|██▍ | 2986/12188 [6:26:33<19:10:29, 7.50s/it] 25%|██▍ | 2987/12188 [6:26:40<18:52:32, 7.39s/it] {'loss': 0.3752, 'grad_norm': 1.0603806876606234, 'learning_rate': 8.836288801018407e-06, 'epoch': 0.25} + 25%|██▍ | 2987/12188 [6:26:40<18:52:32, 7.39s/it] 25%|██▍ | 2988/12188 [6:26:47<18:29:59, 7.24s/it] {'loss': 0.3816, 'grad_norm': 0.9135588159242656, 'learning_rate': 8.83543651473415e-06, 'epoch': 0.25} + 25%|██▍ | 2988/12188 [6:26:47<18:29:59, 7.24s/it] 25%|██▍ | 2989/12188 [6:26:53<17:54:04, 7.01s/it] {'loss': 0.3511, 'grad_norm': 1.0892331224806229, 'learning_rate': 8.83458395759753e-06, 'epoch': 0.25} + 25%|██▍ | 2989/12188 [6:26:53<17:54:04, 7.01s/it] 25%|██▍ | 2990/12188 [6:27:00<17:49:16, 6.98s/it] {'loss': 0.4146, 'grad_norm': 1.0953726587840913, 'learning_rate': 8.83373112966875e-06, 'epoch': 0.25} + 25%|██▍ | 2990/12188 [6:27:00<17:49:16, 6.98s/it] 25%|██▍ | 2991/12188 [6:27:08<18:19:19, 7.17s/it] {'loss': 0.4426, 'grad_norm': 1.6219048080624463, 'learning_rate': 8.83287803100804e-06, 'epoch': 0.25} + 25%|██▍ | 2991/12188 [6:27:08<18:19:19, 7.17s/it] 25%|██▍ | 2992/12188 [6:27:15<18:16:31, 7.15s/it] {'loss': 0.382, 'grad_norm': 0.7944427017362852, 'learning_rate': 8.832024661675638e-06, 'epoch': 0.25} + 25%|██▍ | 2992/12188 [6:27:15<18:16:31, 7.15s/it] 25%|██▍ | 2993/12188 [6:27:22<18:42:00, 7.32s/it] {'loss': 0.3677, 'grad_norm': 1.2735612607266662, 'learning_rate': 8.831171021731813e-06, 'epoch': 0.25} + 25%|██▍ | 2993/12188 [6:27:22<18:42:00, 7.32s/it] 25%|██▍ | 2994/12188 [6:27:29<18:20:59, 7.19s/it] {'loss': 0.4076, 'grad_norm': 0.7496066643507194, 'learning_rate': 8.830317111236845e-06, 'epoch': 0.25} + 25%|██▍ | 2994/12188 [6:27:29<18:20:59, 7.19s/it] 25%|██▍ | 2995/12188 [6:27:36<18:02:36, 7.07s/it] {'loss': 0.3595, 'grad_norm': 1.0939212315275408, 'learning_rate': 8.829462930251036e-06, 'epoch': 0.25} + 25%|██▍ | 2995/12188 [6:27:36<18:02:36, 7.07s/it] 25%|██▍ | 2996/12188 [6:27:43<18:03:04, 7.07s/it] {'loss': 0.373, 'grad_norm': 0.7084763321717339, 'learning_rate': 8.828608478834709e-06, 'epoch': 0.25} + 25%|██▍ | 2996/12188 [6:27:43<18:03:04, 7.07s/it] 25%|██▍ | 2997/12188 [6:27:50<17:57:36, 7.03s/it] {'loss': 0.3579, 'grad_norm': 1.791695889600847, 'learning_rate': 8.8277537570482e-06, 'epoch': 0.25} + 25%|██▍ | 2997/12188 [6:27:50<17:57:36, 7.03s/it] 25%|██▍ | 2998/12188 [6:27:59<19:02:37, 7.46s/it] {'loss': 0.3456, 'grad_norm': 1.6731616090838826, 'learning_rate': 8.826898764951873e-06, 'epoch': 0.25} + 25%|██▍ | 2998/12188 [6:27:59<19:02:37, 7.46s/it] 25%|██▍ | 2999/12188 [6:28:05<18:22:28, 7.20s/it] {'loss': 0.3767, 'grad_norm': 1.3987558829463436, 'learning_rate': 8.826043502606102e-06, 'epoch': 0.25} + 25%|██▍ | 2999/12188 [6:28:05<18:22:28, 7.20s/it] 25%|██▍ | 3000/12188 [6:28:12<17:48:09, 6.98s/it] {'loss': 0.4165, 'grad_norm': 0.7047340385992438, 'learning_rate': 8.825187970071285e-06, 'epoch': 0.25} + 25%|██▍ | 3000/12188 [6:28:12<17:48:09, 6.98s/it]/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/utils/checkpoint.py:87: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( + 25%|██▍ | 3001/12188 [6:28:34<29:44:10, 11.65s/it] {'loss': 0.3756, 'grad_norm': 0.6892800029957916, 'learning_rate': 8.824332167407841e-06, 'epoch': 0.25} + 25%|██▍ | 3001/12188 [6:28:34<29:44:10, 11.65s/it] 25%|██▍ | 3002/12188 [6:28:41<26:17:19, 10.30s/it] {'loss': 0.3971, 'grad_norm': 0.8804511525739181, 'learning_rate': 8.823476094676204e-06, 'epoch': 0.25} + 25%|██▍ | 3002/12188 [6:28:41<26:17:19, 10.30s/it] 25%|██▍ | 3003/12188 [6:28:48<23:41:24, 9.29s/it] {'loss': 0.3518, 'grad_norm': 0.6948173451149168, 'learning_rate': 8.822619751936827e-06, 'epoch': 0.25} + 25%|██▍ | 3003/12188 [6:28:48<23:41:24, 9.29s/it] 25%|██▍ | 3004/12188 [6:28:56<22:15:20, 8.72s/it] {'loss': 0.4015, 'grad_norm': 1.4107031175128668, 'learning_rate': 8.821763139250185e-06, 'epoch': 0.25} + 25%|██▍ | 3004/12188 [6:28:56<22:15:20, 8.72s/it] 25%|██▍ | 3005/12188 [6:29:03<21:24:54, 8.40s/it] {'loss': 0.3935, 'grad_norm': 1.5328218913982867, 'learning_rate': 8.820906256676769e-06, 'epoch': 0.25} + 25%|██▍ | 3005/12188 [6:29:03<21:24:54, 8.40s/it] 25%|██▍ | 3006/12188 [6:29:10<20:06:57, 7.89s/it] {'loss': 0.3642, 'grad_norm': 3.1632452557076234, 'learning_rate': 8.820049104277094e-06, 'epoch': 0.25} + 25%|██▍ | 3006/12188 [6:29:10<20:06:57, 7.89s/it] 25%|██▍ | 3007/12188 [6:29:17<19:17:27, 7.56s/it] {'loss': 0.367, 'grad_norm': 0.9080465456772747, 'learning_rate': 8.819191682111688e-06, 'epoch': 0.25} + 25%|██▍ | 3007/12188 [6:29:17<19:17:27, 7.56s/it] 25%|██▍ | 3008/12188 [6:29:24<18:52:51, 7.40s/it] {'loss': 0.3701, 'grad_norm': 0.7985931729804, 'learning_rate': 8.8183339902411e-06, 'epoch': 0.25} + 25%|██▍ | 3008/12188 [6:29:24<18:52:51, 7.40s/it] 25%|██▍ | 3009/12188 [6:29:31<18:21:40, 7.20s/it] {'loss': 0.3739, 'grad_norm': 0.7448904294284471, 'learning_rate': 8.8174760287259e-06, 'epoch': 0.25} + 25%|██▍ | 3009/12188 [6:29:31<18:21:40, 7.20s/it] 25%|██▍ | 3010/12188 [6:29:38<18:23:13, 7.21s/it] {'loss': 0.3319, 'grad_norm': 0.913598843814343, 'learning_rate': 8.816617797626679e-06, 'epoch': 0.25} + 25%|██▍ | 3010/12188 [6:29:38<18:23:13, 7.21s/it] 25%|██▍ | 3011/12188 [6:29:45<18:29:23, 7.25s/it] {'loss': 0.3957, 'grad_norm': 1.1745708919610058, 'learning_rate': 8.815759297004038e-06, 'epoch': 0.25} + 25%|██▍ | 3011/12188 [6:29:45<18:29:23, 7.25s/it] 25%|██▍ | 3012/12188 [6:29:53<18:51:38, 7.40s/it] {'loss': 0.3859, 'grad_norm': 0.9024060409119015, 'learning_rate': 8.814900526918608e-06, 'epoch': 0.25} + 25%|██▍ | 3012/12188 [6:29:53<18:51:38, 7.40s/it] 25%|██▍ | 3013/12188 [6:30:01<19:17:10, 7.57s/it] {'loss': 0.3912, 'grad_norm': 1.417596370361803, 'learning_rate': 8.814041487431031e-06, 'epoch': 0.25} + 25%|██▍ | 3013/12188 [6:30:01<19:17:10, 7.57s/it] 25%|██▍ | 3014/12188 [6:30:08<18:55:28, 7.43s/it] {'loss': 0.4082, 'grad_norm': 0.639688275575993, 'learning_rate': 8.81318217860197e-06, 'epoch': 0.25} + 25%|██▍ | 3014/12188 [6:30:08<18:55:28, 7.43s/it] 25%|██▍ | 3015/12188 [6:30:16<19:19:33, 7.58s/it] {'loss': 0.4034, 'grad_norm': 0.7488936303860247, 'learning_rate': 8.812322600492114e-06, 'epoch': 0.25} + 25%|██▍ | 3015/12188 [6:30:16<19:19:33, 7.58s/it] 25%|██▍ | 3016/12188 [6:30:26<20:54:49, 8.21s/it] {'loss': 0.3761, 'grad_norm': 0.6703740975490076, 'learning_rate': 8.811462753162157e-06, 'epoch': 0.25} + 25%|██▍ | 3016/12188 [6:30:26<20:54:49, 8.21s/it] 25%|██▍ | 3017/12188 [6:30:32<19:54:43, 7.82s/it] {'loss': 0.3513, 'grad_norm': 0.8422298300499784, 'learning_rate': 8.810602636672827e-06, 'epoch': 0.25} + 25%|██▍ | 3017/12188 [6:30:33<19:54:43, 7.82s/it] 25%|██▍ | 3018/12188 [6:30:40<19:48:24, 7.78s/it] {'loss': 0.3922, 'grad_norm': 0.6918097269493877, 'learning_rate': 8.80974225108486e-06, 'epoch': 0.25} + 25%|██▍ | 3018/12188 [6:30:40<19:48:24, 7.78s/it] 25%|██▍ | 3019/12188 [6:30:47<19:23:04, 7.61s/it] {'loss': 0.3694, 'grad_norm': 0.676674827611269, 'learning_rate': 8.808881596459015e-06, 'epoch': 0.25} + 25%|██▍ | 3019/12188 [6:30:47<19:23:04, 7.61s/it] 25%|██▍ | 3020/12188 [6:30:55<19:17:41, 7.58s/it] {'loss': 0.3683, 'grad_norm': 0.7210611531257359, 'learning_rate': 8.808020672856072e-06, 'epoch': 0.25} + 25%|██▍ | 3020/12188 [6:30:55<19:17:41, 7.58s/it] 25%|██▍ | 3021/12188 [6:31:02<19:11:11, 7.53s/it] {'loss': 0.305, 'grad_norm': 0.6295419739809693, 'learning_rate': 8.807159480336827e-06, 'epoch': 0.25} + 25%|██▍ | 3021/12188 [6:31:02<19:11:11, 7.53s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fe6d000f330> +[Try #0] Failed to fetch sample 4442498 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fe6d000f330> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Logs'"}, {'from': 'gpt', 'value': '\nclick(x=0.865, y=0.388)\n'}]} + 25%|██▍ | 3022/12188 [6:31:09<18:31:55, 7.28s/it] {'loss': 0.3492, 'grad_norm': 0.6162706266320641, 'learning_rate': 8.806298018962097e-06, 'epoch': 0.25} + 25%|██▍ | 3022/12188 [6:31:09<18:31:55, 7.28s/it] 25%|██▍ | 3023/12188 [6:31:17<19:18:18, 7.58s/it] {'loss': 0.4032, 'grad_norm': 0.6312782944741793, 'learning_rate': 8.805436288792714e-06, 'epoch': 0.25} + 25%|██▍ | 3023/12188 [6:31:17<19:18:18, 7.58s/it] 25%|██▍ | 3024/12188 [6:31:25<19:11:39, 7.54s/it] {'loss': 0.3882, 'grad_norm': 0.6296488597631836, 'learning_rate': 8.804574289889536e-06, 'epoch': 0.25} + 25%|██▍ | 3024/12188 [6:31:25<19:11:39, 7.54s/it] 25%|██▍ | 3025/12188 [6:31:31<18:30:53, 7.27s/it] {'loss': 0.3929, 'grad_norm': 0.7166549387625955, 'learning_rate': 8.803712022313433e-06, 'epoch': 0.25} + 25%|██▍ | 3025/12188 [6:31:31<18:30:53, 7.27s/it] 25%|██▍ | 3026/12188 [6:31:38<18:19:35, 7.20s/it] {'loss': 0.4198, 'grad_norm': 0.7556890375643721, 'learning_rate': 8.8028494861253e-06, 'epoch': 0.25} + 25%|██▍ | 3026/12188 [6:31:38<18:19:35, 7.20s/it] 25%|██▍ | 3027/12188 [6:31:45<17:42:24, 6.96s/it] {'loss': 0.3403, 'grad_norm': 0.8018425809417878, 'learning_rate': 8.801986681386044e-06, 'epoch': 0.25} + 25%|██▍ | 3027/12188 [6:31:45<17:42:24, 6.96s/it] 25%|██▍ | 3028/12188 [6:31:52<17:38:13, 6.93s/it] {'loss': 0.4029, 'grad_norm': 0.6280914350240412, 'learning_rate': 8.801123608156597e-06, 'epoch': 0.25} + 25%|██▍ | 3028/12188 [6:31:52<17:38:13, 6.93s/it] 25%|██▍ | 3029/12188 [6:31:59<17:40:40, 6.95s/it] {'loss': 0.4109, 'grad_norm': 0.9391231158467825, 'learning_rate': 8.800260266497909e-06, 'epoch': 0.25} + 25%|██▍ | 3029/12188 [6:31:59<17:40:40, 6.95s/it] 25%|██▍ | 3030/12188 [6:32:06<17:48:06, 7.00s/it] {'loss': 0.3843, 'grad_norm': 0.7106335902286239, 'learning_rate': 8.799396656470946e-06, 'epoch': 0.25} + 25%|██▍ | 3030/12188 [6:32:06<17:48:06, 7.00s/it] 25%|██▍ | 3031/12188 [6:32:13<17:37:40, 6.93s/it] {'loss': 0.3999, 'grad_norm': 0.9968615290258042, 'learning_rate': 8.798532778136697e-06, 'epoch': 0.25} + 25%|██▍ | 3031/12188 [6:32:13<17:37:40, 6.93s/it] 25%|██▍ | 3032/12188 [6:32:19<17:34:00, 6.91s/it] {'loss': 0.3768, 'grad_norm': 0.7407498450785749, 'learning_rate': 8.797668631556163e-06, 'epoch': 0.25} + 25%|██▍ | 3032/12188 [6:32:19<17:34:00, 6.91s/it] 25%|██▍ | 3033/12188 [6:32:26<17:15:08, 6.78s/it] {'loss': 0.3668, 'grad_norm': 0.6316564959525762, 'learning_rate': 8.796804216790373e-06, 'epoch': 0.25} + 25%|██▍ | 3033/12188 [6:32:26<17:15:08, 6.78s/it] 25%|██▍ | 3034/12188 [6:32:33<17:35:04, 6.92s/it] {'loss': 0.3728, 'grad_norm': 0.892639849407937, 'learning_rate': 8.79593953390037e-06, 'epoch': 0.25} + 25%|██▍ | 3034/12188 [6:32:33<17:35:04, 6.92s/it] 25%|██▍ | 3035/12188 [6:32:41<17:57:22, 7.06s/it] {'loss': 0.3583, 'grad_norm': 0.6797198220073875, 'learning_rate': 8.795074582947214e-06, 'epoch': 0.25} + 25%|██▍ | 3035/12188 [6:32:41<17:57:22, 7.06s/it] 25%|██▍ | 3036/12188 [6:32:47<17:36:33, 6.93s/it] {'loss': 0.3953, 'grad_norm': 0.7928915643944122, 'learning_rate': 8.79420936399199e-06, 'epoch': 0.25} + 25%|██▍ | 3036/12188 [6:32:47<17:36:33, 6.93s/it] 25%|██▍ | 3037/12188 [6:32:54<17:27:58, 6.87s/it] {'loss': 0.3469, 'grad_norm': 1.0304504985137364, 'learning_rate': 8.793343877095795e-06, 'epoch': 0.25} + 25%|██▍ | 3037/12188 [6:32:54<17:27:58, 6.87s/it] 25%|██▍ | 3038/12188 [6:33:02<18:19:39, 7.21s/it] {'loss': 0.3715, 'grad_norm': 0.9216749731040303, 'learning_rate': 8.792478122319753e-06, 'epoch': 0.25} + 25%|██▍ | 3038/12188 [6:33:02<18:19:39, 7.21s/it] 25%|██▍ | 3039/12188 [6:33:13<21:00:10, 8.26s/it] {'loss': 0.3803, 'grad_norm': 0.7851333497720723, 'learning_rate': 8.791612099724996e-06, 'epoch': 0.25} + 25%|██▍ | 3039/12188 [6:33:13<21:00:10, 8.26s/it] 25%|██▍ | 3040/12188 [6:33:19<19:53:07, 7.83s/it] {'loss': 0.3381, 'grad_norm': 1.2000388047521169, 'learning_rate': 8.790745809372685e-06, 'epoch': 0.25} + 25%|██▍ | 3040/12188 [6:33:19<19:53:07, 7.83s/it] 25%|██▍ | 3041/12188 [6:33:26<19:13:16, 7.56s/it] {'loss': 0.3312, 'grad_norm': 1.1040804094062369, 'learning_rate': 8.789879251323994e-06, 'epoch': 0.25} + 25%|██▍ | 3041/12188 [6:33:26<19:13:16, 7.56s/it] 25%|██▍ | 3042/12188 [6:33:33<18:43:08, 7.37s/it] {'loss': 0.349, 'grad_norm': 0.6510548362952556, 'learning_rate': 8.78901242564012e-06, 'epoch': 0.25} + 25%|██▍ | 3042/12188 [6:33:33<18:43:08, 7.37s/it] 25%|██▍ | 3043/12188 [6:33:40<18:13:45, 7.18s/it] {'loss': 0.4081, 'grad_norm': 0.748662987209478, 'learning_rate': 8.788145332382277e-06, 'epoch': 0.25} + 25%|██▍ | 3043/12188 [6:33:40<18:13:45, 7.18s/it] 25%|██▍ | 3044/12188 [6:33:48<18:39:28, 7.35s/it] {'loss': 0.3902, 'grad_norm': 0.9527244100926405, 'learning_rate': 8.787277971611697e-06, 'epoch': 0.25} + 25%|██▍ | 3044/12188 [6:33:48<18:39:28, 7.35s/it] 25%|██▍ | 3045/12188 [6:33:54<18:10:46, 7.16s/it] {'loss': 0.3714, 'grad_norm': 1.146875550439966, 'learning_rate': 8.78641034338963e-06, 'epoch': 0.25} + 25%|██▍ | 3045/12188 [6:33:54<18:10:46, 7.16s/it] 25%|██▍ | 3046/12188 [6:34:01<17:57:18, 7.07s/it] {'loss': 0.3854, 'grad_norm': 0.675962389119729, 'learning_rate': 8.78554244777735e-06, 'epoch': 0.25} + 25%|██▍ | 3046/12188 [6:34:01<17:57:18, 7.07s/it] 25%|██▌ | 3047/12188 [6:34:08<17:58:08, 7.08s/it] {'loss': 0.3719, 'grad_norm': 0.702898146711128, 'learning_rate': 8.784674284836141e-06, 'epoch': 0.25} + 25%|██▌ | 3047/12188 [6:34:08<17:58:08, 7.08s/it] 25%|██▌ | 3048/12188 [6:34:17<18:50:13, 7.42s/it] {'loss': 0.33, 'grad_norm': 0.5893852862835779, 'learning_rate': 8.783805854627317e-06, 'epoch': 0.25} + 25%|██▌ | 3048/12188 [6:34:17<18:50:13, 7.42s/it] 25%|██▌ | 3049/12188 [6:34:27<20:53:29, 8.23s/it] {'loss': 0.3741, 'grad_norm': 0.9111823938331519, 'learning_rate': 8.782937157212201e-06, 'epoch': 0.25} + 25%|██▌ | 3049/12188 [6:34:27<20:53:29, 8.23s/it] 25%|██▌ | 3050/12188 [6:34:34<20:08:27, 7.93s/it] {'loss': 0.397, 'grad_norm': 0.828993825505094, 'learning_rate': 8.782068192652143e-06, 'epoch': 0.25} + 25%|██▌ | 3050/12188 [6:34:34<20:08:27, 7.93s/it] 25%|██▌ | 3051/12188 [6:34:41<19:39:37, 7.75s/it] {'loss': 0.386, 'grad_norm': 0.9204120507503143, 'learning_rate': 8.781198961008504e-06, 'epoch': 0.25} + 25%|██▌ | 3051/12188 [6:34:41<19:39:37, 7.75s/it] 25%|██▌ | 3052/12188 [6:34:48<18:41:32, 7.37s/it] {'loss': 0.3753, 'grad_norm': 0.6389945631281025, 'learning_rate': 8.780329462342669e-06, 'epoch': 0.25} + 25%|██▌ | 3052/12188 [6:34:48<18:41:32, 7.37s/it] 25%|██▌ | 3053/12188 [6:34:56<19:26:06, 7.66s/it] {'loss': 0.359, 'grad_norm': 1.1067516642738267, 'learning_rate': 8.779459696716042e-06, 'epoch': 0.25} + 25%|██▌ | 3053/12188 [6:34:56<19:26:06, 7.66s/it] 25%|██▌ | 3054/12188 [6:35:03<18:53:15, 7.44s/it] {'loss': 0.3847, 'grad_norm': 0.9991267696738696, 'learning_rate': 8.778589664190043e-06, 'epoch': 0.25} + 25%|██▌ | 3054/12188 [6:35:03<18:53:15, 7.44s/it] 25%|██▌ | 3055/12188 [6:35:10<18:27:24, 7.28s/it] {'loss': 0.3386, 'grad_norm': 0.642300597176655, 'learning_rate': 8.777719364826112e-06, 'epoch': 0.25} + 25%|██▌ | 3055/12188 [6:35:10<18:27:24, 7.28s/it] 25%|██▌ | 3056/12188 [6:35:17<18:38:06, 7.35s/it] {'loss': 0.3581, 'grad_norm': 0.7457795379850656, 'learning_rate': 8.77684879868571e-06, 'epoch': 0.25} + 25%|██▌ | 3056/12188 [6:35:18<18:38:06, 7.35s/it] 25%|██▌ | 3057/12188 [6:35:25<18:49:56, 7.42s/it] {'loss': 0.3859, 'grad_norm': 0.7290220715012876, 'learning_rate': 8.775977965830312e-06, 'epoch': 0.25} + 25%|██▌ | 3057/12188 [6:35:25<18:49:56, 7.42s/it] 25%|██▌ | 3058/12188 [6:35:32<18:26:55, 7.27s/it] {'loss': 0.3536, 'grad_norm': 0.7150292323788718, 'learning_rate': 8.775106866321419e-06, 'epoch': 0.25} + 25%|██▌ | 3058/12188 [6:35:32<18:26:55, 7.27s/it] 25%|██▌ | 3059/12188 [6:35:39<18:14:10, 7.19s/it] {'loss': 0.3955, 'grad_norm': 0.6151029625758356, 'learning_rate': 8.774235500220542e-06, 'epoch': 0.25} + 25%|██▌ | 3059/12188 [6:35:39<18:14:10, 7.19s/it] 25%|██▌ | 3060/12188 [6:35:47<18:45:40, 7.40s/it] {'loss': 0.3846, 'grad_norm': 0.8500418380247492, 'learning_rate': 8.773363867589219e-06, 'epoch': 0.25} + 25%|██▌ | 3060/12188 [6:35:47<18:45:40, 7.40s/it] 25%|██▌ | 3061/12188 [6:35:54<18:30:10, 7.30s/it] {'loss': 0.3553, 'grad_norm': 0.7340326277096766, 'learning_rate': 8.772491968489003e-06, 'epoch': 0.25} + 25%|██▌ | 3061/12188 [6:35:54<18:30:10, 7.30s/it] 25%|██▌ | 3062/12188 [6:36:01<18:17:37, 7.22s/it] {'loss': 0.378, 'grad_norm': 0.9137741147682489, 'learning_rate': 8.771619802981463e-06, 'epoch': 0.25} + 25%|██▌ | 3062/12188 [6:36:01<18:17:37, 7.22s/it] 25%|██▌ | 3063/12188 [6:36:09<18:47:00, 7.41s/it] {'loss': 0.3893, 'grad_norm': 0.6801281100191163, 'learning_rate': 8.770747371128195e-06, 'epoch': 0.25} + 25%|██▌ | 3063/12188 [6:36:09<18:47:00, 7.41s/it] 25%|██▌ | 3064/12188 [6:36:16<18:24:16, 7.26s/it] {'loss': 0.3761, 'grad_norm': 0.6934673904657431, 'learning_rate': 8.769874672990802e-06, 'epoch': 0.25} + 25%|██▌ | 3064/12188 [6:36:16<18:24:16, 7.26s/it] 25%|██▌ | 3065/12188 [6:36:23<18:05:14, 7.14s/it] {'loss': 0.4173, 'grad_norm': 0.704497505988441, 'learning_rate': 8.76900170863092e-06, 'epoch': 0.25} + 25%|██▌ | 3065/12188 [6:36:23<18:05:14, 7.14s/it] 25%|██▌ | 3066/12188 [6:36:31<18:52:09, 7.45s/it] {'loss': 0.3981, 'grad_norm': 0.649337551494044, 'learning_rate': 8.76812847811019e-06, 'epoch': 0.25} + 25%|██▌ | 3066/12188 [6:36:31<18:52:09, 7.45s/it] 25%|██▌ | 3067/12188 [6:36:37<18:13:56, 7.20s/it] {'loss': 0.3991, 'grad_norm': 0.7441478229283116, 'learning_rate': 8.767254981490282e-06, 'epoch': 0.25} + 25%|██▌ | 3067/12188 [6:36:37<18:13:56, 7.20s/it] 25%|██▌ | 3068/12188 [6:36:44<17:51:46, 7.05s/it] {'loss': 0.3713, 'grad_norm': 0.7252684379492031, 'learning_rate': 8.76638121883288e-06, 'epoch': 0.25} + 25%|██▌ | 3068/12188 [6:36:44<17:51:46, 7.05s/it] 25%|██▌ | 3069/12188 [6:36:51<18:01:13, 7.11s/it] {'loss': 0.3546, 'grad_norm': 0.6737842172951855, 'learning_rate': 8.765507190199687e-06, 'epoch': 0.25} + 25%|██▌ | 3069/12188 [6:36:51<18:01:13, 7.11s/it] 25%|██▌ | 3070/12188 [6:36:58<17:39:34, 6.97s/it] {'loss': 0.3407, 'grad_norm': 0.6950362241773563, 'learning_rate': 8.764632895652425e-06, 'epoch': 0.25} + 25%|██▌ | 3070/12188 [6:36:58<17:39:34, 6.97s/it] 25%|██▌ | 3071/12188 [6:37:05<17:49:40, 7.04s/it] {'loss': 0.3639, 'grad_norm': 0.8296314664343977, 'learning_rate': 8.763758335252838e-06, 'epoch': 0.25} + 25%|██▌ | 3071/12188 [6:37:05<17:49:40, 7.04s/it] 25%|██▌ | 3072/12188 [6:37:12<17:42:52, 7.00s/it] {'loss': 0.3629, 'grad_norm': 0.7643804841735021, 'learning_rate': 8.762883509062684e-06, 'epoch': 0.25} + 25%|██▌ | 3072/12188 [6:37:12<17:42:52, 7.00s/it] 25%|██▌ | 3073/12188 [6:37:19<17:37:34, 6.96s/it] {'loss': 0.4237, 'grad_norm': 0.8889505624952746, 'learning_rate': 8.762008417143742e-06, 'epoch': 0.25} + 25%|██▌ | 3073/12188 [6:37:19<17:37:34, 6.96s/it] 25%|██▌ | 3074/12188 [6:37:26<17:49:52, 7.04s/it] {'loss': 0.3777, 'grad_norm': 0.6238035289740788, 'learning_rate': 8.761133059557809e-06, 'epoch': 0.25} + 25%|██▌ | 3074/12188 [6:37:26<17:49:52, 7.04s/it] 25%|██▌ | 3075/12188 [6:37:34<18:03:51, 7.14s/it] {'loss': 0.3724, 'grad_norm': 0.76396875095185, 'learning_rate': 8.760257436366703e-06, 'epoch': 0.25} + 25%|██▌ | 3075/12188 [6:37:34<18:03:51, 7.14s/it] 25%|██▌ | 3076/12188 [6:37:41<18:06:56, 7.16s/it] {'loss': 0.3901, 'grad_norm': 0.6519233319380319, 'learning_rate': 8.759381547632258e-06, 'epoch': 0.25} + 25%|██▌ | 3076/12188 [6:37:41<18:06:56, 7.16s/it] 25%|██▌ | 3077/12188 [6:37:48<17:56:20, 7.09s/it] {'loss': 0.3526, 'grad_norm': 0.682121211004433, 'learning_rate': 8.758505393416327e-06, 'epoch': 0.25} + 25%|██▌ | 3077/12188 [6:37:48<17:56:20, 7.09s/it] 25%|██▌ | 3078/12188 [6:37:57<19:46:33, 7.81s/it] {'loss': 0.3923, 'grad_norm': 0.7626716712707696, 'learning_rate': 8.757628973780785e-06, 'epoch': 0.25} + 25%|██▌ | 3078/12188 [6:37:57<19:46:33, 7.81s/it] 25%|██▌ | 3079/12188 [6:38:04<19:02:19, 7.52s/it] {'loss': 0.4167, 'grad_norm': 0.7330884134346776, 'learning_rate': 8.75675228878752e-06, 'epoch': 0.25} + 25%|██▌ | 3079/12188 [6:38:04<19:02:19, 7.52s/it] 25%|██▌ | 3080/12188 [6:38:11<18:29:05, 7.31s/it] {'loss': 0.3531, 'grad_norm': 0.6871625114297178, 'learning_rate': 8.755875338498445e-06, 'epoch': 0.25} + 25%|██▌ | 3080/12188 [6:38:11<18:29:05, 7.31s/it] 25%|██▌ | 3081/12188 [6:38:18<18:21:22, 7.26s/it] {'loss': 0.3419, 'grad_norm': 0.653881730237364, 'learning_rate': 8.754998122975489e-06, 'epoch': 0.25} + 25%|██▌ | 3081/12188 [6:38:18<18:21:22, 7.26s/it] 25%|██▌ | 3082/12188 [6:38:25<17:55:54, 7.09s/it] {'loss': 0.4616, 'grad_norm': 0.7733595193509784, 'learning_rate': 8.754120642280597e-06, 'epoch': 0.25} + 25%|██▌ | 3082/12188 [6:38:25<17:55:54, 7.09s/it] 25%|██▌ | 3083/12188 [6:38:32<17:51:41, 7.06s/it] {'loss': 0.3927, 'grad_norm': 0.7332296891829597, 'learning_rate': 8.753242896475737e-06, 'epoch': 0.25} + 25%|██▌ | 3083/12188 [6:38:32<17:51:41, 7.06s/it] 25%|██▌ | 3084/12188 [6:38:39<18:19:06, 7.24s/it] {'loss': 0.3788, 'grad_norm': 1.1917109692431973, 'learning_rate': 8.752364885622895e-06, 'epoch': 0.25} + 25%|██▌ | 3084/12188 [6:38:39<18:19:06, 7.24s/it] 25%|██▌ | 3085/12188 [6:38:46<18:04:52, 7.15s/it] {'loss': 0.3564, 'grad_norm': 0.6938713731334266, 'learning_rate': 8.751486609784073e-06, 'epoch': 0.25} + 25%|██▌ | 3085/12188 [6:38:46<18:04:52, 7.15s/it] 25%|██▌ | 3086/12188 [6:38:53<17:53:24, 7.08s/it] {'loss': 0.3866, 'grad_norm': 0.6756648714475658, 'learning_rate': 8.750608069021292e-06, 'epoch': 0.25} + 25%|██▌ | 3086/12188 [6:38:53<17:53:24, 7.08s/it] 25%|██▌ | 3087/12188 [6:39:00<17:40:37, 6.99s/it] {'loss': 0.3389, 'grad_norm': 0.654213081297272, 'learning_rate': 8.749729263396597e-06, 'epoch': 0.25} + 25%|██▌ | 3087/12188 [6:39:00<17:40:37, 6.99s/it] 25%|██▌ | 3088/12188 [6:39:08<18:28:15, 7.31s/it] {'loss': 0.3685, 'grad_norm': 0.8856446479266715, 'learning_rate': 8.748850192972046e-06, 'epoch': 0.25} + 25%|██▌ | 3088/12188 [6:39:08<18:28:15, 7.31s/it] 25%|██▌ | 3089/12188 [6:39:15<18:14:31, 7.22s/it] {'loss': 0.3648, 'grad_norm': 1.0602983304927052, 'learning_rate': 8.747970857809715e-06, 'epoch': 0.25} + 25%|██▌ | 3089/12188 [6:39:15<18:14:31, 7.22s/it] 25%|██▌ | 3090/12188 [6:39:22<18:00:33, 7.13s/it] {'loss': 0.3416, 'grad_norm': 0.7159911380712197, 'learning_rate': 8.747091257971706e-06, 'epoch': 0.25} + 25%|██▌ | 3090/12188 [6:39:22<18:00:33, 7.13s/it] 25%|██▌ | 3091/12188 [6:39:30<18:39:40, 7.38s/it] {'loss': 0.3737, 'grad_norm': 0.9648275976423778, 'learning_rate': 8.746211393520131e-06, 'epoch': 0.25} + 25%|██▌ | 3091/12188 [6:39:30<18:39:40, 7.38s/it] 25%|██▌ | 3092/12188 [6:39:37<18:17:33, 7.24s/it] {'loss': 0.3688, 'grad_norm': 0.6486625779394859, 'learning_rate': 8.745331264517126e-06, 'epoch': 0.25} + 25%|██▌ | 3092/12188 [6:39:37<18:17:33, 7.24s/it] 25%|██▌ | 3093/12188 [6:39:45<18:38:26, 7.38s/it] {'loss': 0.4088, 'grad_norm': 0.644147888213152, 'learning_rate': 8.744450871024846e-06, 'epoch': 0.25} + 25%|██▌ | 3093/12188 [6:39:45<18:38:26, 7.38s/it] 25%|██▌ | 3094/12188 [6:39:53<19:27:50, 7.71s/it] {'loss': 0.3909, 'grad_norm': 0.8956896047725762, 'learning_rate': 8.74357021310546e-06, 'epoch': 0.25} + 25%|██▌ | 3094/12188 [6:39:53<19:27:50, 7.71s/it] 25%|██▌ | 3095/12188 [6:40:00<18:40:16, 7.39s/it] {'loss': 0.3455, 'grad_norm': 1.026362358628755, 'learning_rate': 8.74268929082116e-06, 'epoch': 0.25} + 25%|██▌ | 3095/12188 [6:40:00<18:40:16, 7.39s/it] 25%|██▌ | 3096/12188 [6:40:09<19:59:22, 7.91s/it] {'loss': 0.4243, 'grad_norm': 0.6290164191936498, 'learning_rate': 8.741808104234157e-06, 'epoch': 0.25} + 25%|██▌ | 3096/12188 [6:40:09<19:59:22, 7.91s/it] 25%|██▌ | 3097/12188 [6:40:16<19:14:17, 7.62s/it] {'loss': 0.3837, 'grad_norm': 0.988097534016537, 'learning_rate': 8.740926653406676e-06, 'epoch': 0.25} + 25%|██▌ | 3097/12188 [6:40:16<19:14:17, 7.62s/it] 25%|██▌ | 3098/12188 [6:40:23<18:43:15, 7.41s/it] {'loss': 0.3659, 'grad_norm': 0.6539478522582054, 'learning_rate': 8.740044938400965e-06, 'epoch': 0.25} + 25%|██▌ | 3098/12188 [6:40:23<18:43:15, 7.41s/it] 25%|██▌ | 3099/12188 [6:40:30<18:43:57, 7.42s/it] {'loss': 0.3777, 'grad_norm': 0.6980028278389073, 'learning_rate': 8.73916295927929e-06, 'epoch': 0.25} + 25%|██▌ | 3099/12188 [6:40:30<18:43:57, 7.42s/it] 25%|██▌ | 3100/12188 [6:40:37<18:14:17, 7.22s/it] {'loss': 0.3894, 'grad_norm': 0.7182057234326134, 'learning_rate': 8.738280716103933e-06, 'epoch': 0.25} + 25%|██▌ | 3100/12188 [6:40:37<18:14:17, 7.22s/it] 25%|██▌ | 3101/12188 [6:40:44<17:56:43, 7.11s/it] {'loss': 0.3854, 'grad_norm': 0.7106198030103167, 'learning_rate': 8.7373982089372e-06, 'epoch': 0.25} + 25%|██▌ | 3101/12188 [6:40:44<17:56:43, 7.11s/it] 25%|██▌ | 3102/12188 [6:40:52<19:10:46, 7.60s/it] {'loss': 0.3821, 'grad_norm': 0.6866009263146211, 'learning_rate': 8.736515437841408e-06, 'epoch': 0.25} + 25%|██▌ | 3102/12188 [6:40:52<19:10:46, 7.60s/it] 25%|██▌ | 3103/12188 [6:41:00<19:15:48, 7.63s/it] {'loss': 0.4356, 'grad_norm': 0.6293881911369105, 'learning_rate': 8.7356324028789e-06, 'epoch': 0.25} + 25%|██▌ | 3103/12188 [6:41:00<19:15:48, 7.63s/it] 25%|██▌ | 3104/12188 [6:41:07<18:43:10, 7.42s/it] {'loss': 0.382, 'grad_norm': 0.6421007613734946, 'learning_rate': 8.734749104112032e-06, 'epoch': 0.25} + 25%|██▌ | 3104/12188 [6:41:07<18:43:10, 7.42s/it] 25%|██▌ | 3105/12188 [6:41:14<18:23:15, 7.29s/it] {'loss': 0.4328, 'grad_norm': 0.7882450782968005, 'learning_rate': 8.733865541603185e-06, 'epoch': 0.25} + 25%|██▌ | 3105/12188 [6:41:14<18:23:15, 7.29s/it] 25%|██▌ | 3106/12188 [6:41:22<19:10:06, 7.60s/it] {'loss': 0.4106, 'grad_norm': 0.6729069518455423, 'learning_rate': 8.73298171541475e-06, 'epoch': 0.25} + 25%|██▌ | 3106/12188 [6:41:22<19:10:06, 7.60s/it] 25%|██▌ | 3107/12188 [6:41:30<19:08:15, 7.59s/it] {'loss': 0.3882, 'grad_norm': 0.6719987722859339, 'learning_rate': 8.732097625609145e-06, 'epoch': 0.25} + 25%|██▌ | 3107/12188 [6:41:30<19:08:15, 7.59s/it] 26%|██▌ | 3108/12188 [6:41:37<18:43:42, 7.43s/it] {'loss': 0.387, 'grad_norm': 0.78185861093442, 'learning_rate': 8.731213272248803e-06, 'epoch': 0.25} + 26%|██▌ | 3108/12188 [6:41:37<18:43:42, 7.43s/it] 26%|██▌ | 3109/12188 [6:41:45<18:51:33, 7.48s/it] {'loss': 0.3944, 'grad_norm': 0.7203179324118741, 'learning_rate': 8.730328655396171e-06, 'epoch': 0.26} + 26%|██▌ | 3109/12188 [6:41:45<18:51:33, 7.48s/it] 26%|██▌ | 3110/12188 [6:41:52<18:33:52, 7.36s/it] {'loss': 0.3573, 'grad_norm': 0.6432028957022919, 'learning_rate': 8.729443775113727e-06, 'epoch': 0.26} + 26%|██▌ | 3110/12188 [6:41:52<18:33:52, 7.36s/it] 26%|██▌ | 3111/12188 [6:41:59<18:48:39, 7.46s/it] {'loss': 0.3882, 'grad_norm': 0.9474889377858284, 'learning_rate': 8.728558631463951e-06, 'epoch': 0.26} + 26%|██▌ | 3111/12188 [6:41:59<18:48:39, 7.46s/it] 26%|██▌ | 3112/12188 [6:42:06<18:16:20, 7.25s/it] {'loss': 0.3503, 'grad_norm': 0.8867881523997386, 'learning_rate': 8.727673224509358e-06, 'epoch': 0.26} + 26%|██▌ | 3112/12188 [6:42:06<18:16:20, 7.25s/it] 26%|██▌ | 3113/12188 [6:42:13<17:53:51, 7.10s/it] {'loss': 0.332, 'grad_norm': 0.747558353303817, 'learning_rate': 8.72678755431247e-06, 'epoch': 0.26} + 26%|██▌ | 3113/12188 [6:42:13<17:53:51, 7.10s/it] 26%|██▌ | 3114/12188 [6:42:21<18:20:05, 7.27s/it] {'loss': 0.4018, 'grad_norm': 0.7508825298846448, 'learning_rate': 8.725901620935832e-06, 'epoch': 0.26} + 26%|██▌ | 3114/12188 [6:42:21<18:20:05, 7.27s/it] 26%|██▌ | 3115/12188 [6:42:27<17:50:07, 7.08s/it] {'loss': 0.4386, 'grad_norm': 0.7161126731747893, 'learning_rate': 8.725015424442008e-06, 'epoch': 0.26} + 26%|██▌ | 3115/12188 [6:42:27<17:50:07, 7.08s/it] 26%|██▌ | 3116/12188 [6:42:35<18:37:22, 7.39s/it] {'loss': 0.3746, 'grad_norm': 0.6145420502089158, 'learning_rate': 8.724128964893577e-06, 'epoch': 0.26} + 26%|██▌ | 3116/12188 [6:42:35<18:37:22, 7.39s/it] 26%|██▌ | 3117/12188 [6:42:43<18:46:14, 7.45s/it] {'loss': 0.3473, 'grad_norm': 0.6279896416480096, 'learning_rate': 8.723242242353144e-06, 'epoch': 0.26} + 26%|██▌ | 3117/12188 [6:42:43<18:46:14, 7.45s/it] 26%|██▌ | 3118/12188 [6:42:50<18:31:10, 7.35s/it] {'loss': 0.3553, 'grad_norm': 0.8217677090022822, 'learning_rate': 8.722355256883327e-06, 'epoch': 0.26} + 26%|██▌ | 3118/12188 [6:42:50<18:31:10, 7.35s/it] 26%|██▌ | 3119/12188 [6:42:57<18:20:52, 7.28s/it] {'loss': 0.3634, 'grad_norm': 0.7874414825887118, 'learning_rate': 8.721468008546761e-06, 'epoch': 0.26} + 26%|██▌ | 3119/12188 [6:42:57<18:20:52, 7.28s/it] 26%|██▌ | 3120/12188 [6:43:05<18:43:07, 7.43s/it] {'loss': 0.4106, 'grad_norm': 1.28294732132005, 'learning_rate': 8.720580497406103e-06, 'epoch': 0.26} + 26%|██▌ | 3120/12188 [6:43:05<18:43:07, 7.43s/it] 26%|██▌ | 3121/12188 [6:43:12<18:16:58, 7.26s/it] {'loss': 0.3524, 'grad_norm': 0.651098870704053, 'learning_rate': 8.719692723524028e-06, 'epoch': 0.26} + 26%|██▌ | 3121/12188 [6:43:12<18:16:58, 7.26s/it] 26%|██▌ | 3122/12188 [6:43:19<18:05:02, 7.18s/it] {'loss': 0.3786, 'grad_norm': 0.6494304494663434, 'learning_rate': 8.718804686963229e-06, 'epoch': 0.26} + 26%|██▌ | 3122/12188 [6:43:19<18:05:02, 7.18s/it] 26%|██▌ | 3123/12188 [6:43:25<17:35:12, 6.98s/it] {'loss': 0.3768, 'grad_norm': 0.6631088836328631, 'learning_rate': 8.717916387786417e-06, 'epoch': 0.26} + 26%|██▌ | 3123/12188 [6:43:25<17:35:12, 6.98s/it] 26%|██▌ | 3124/12188 [6:43:34<18:46:55, 7.46s/it] {'loss': 0.3747, 'grad_norm': 0.6573655231048589, 'learning_rate': 8.717027826056324e-06, 'epoch': 0.26} + 26%|██▌ | 3124/12188 [6:43:34<18:46:55, 7.46s/it] 26%|██▌ | 3125/12188 [6:43:41<18:51:20, 7.49s/it] {'loss': 0.3587, 'grad_norm': 0.7067813693421778, 'learning_rate': 8.716139001835698e-06, 'epoch': 0.26} + 26%|██▌ | 3125/12188 [6:43:41<18:51:20, 7.49s/it] 26%|██▌ | 3126/12188 [6:43:49<18:58:39, 7.54s/it] {'loss': 0.344, 'grad_norm': 0.623039176360185, 'learning_rate': 8.715249915187305e-06, 'epoch': 0.26} + 26%|██▌ | 3126/12188 [6:43:49<18:58:39, 7.54s/it] 26%|██▌ | 3127/12188 [6:43:57<19:05:51, 7.59s/it] {'loss': 0.3606, 'grad_norm': 0.6425020434895568, 'learning_rate': 8.714360566173932e-06, 'epoch': 0.26} + 26%|██▌ | 3127/12188 [6:43:57<19:05:51, 7.59s/it] 26%|██▌ | 3128/12188 [6:44:05<19:37:03, 7.80s/it] {'loss': 0.4179, 'grad_norm': 0.7357136308599259, 'learning_rate': 8.713470954858385e-06, 'epoch': 0.26} + 26%|██▌ | 3128/12188 [6:44:05<19:37:03, 7.80s/it] 26%|██▌ | 3129/12188 [6:44:12<18:57:44, 7.54s/it] {'loss': 0.4084, 'grad_norm': 0.8416991652523412, 'learning_rate': 8.712581081303482e-06, 'epoch': 0.26} + 26%|██▌ | 3129/12188 [6:44:12<18:57:44, 7.54s/it] 26%|██▌ | 3130/12188 [6:44:19<18:23:25, 7.31s/it] {'loss': 0.326, 'grad_norm': 0.6194575037564201, 'learning_rate': 8.71169094557207e-06, 'epoch': 0.26} + 26%|██▌ | 3130/12188 [6:44:19<18:23:25, 7.31s/it] 26%|██▌ | 3131/12188 [6:44:27<19:07:47, 7.60s/it] {'loss': 0.356, 'grad_norm': 0.6477605472957103, 'learning_rate': 8.710800547727008e-06, 'epoch': 0.26} + 26%|██▌ | 3131/12188 [6:44:27<19:07:47, 7.60s/it] 26%|██▌ | 3132/12188 [6:44:34<18:21:48, 7.30s/it] {'loss': 0.3759, 'grad_norm': 0.6478904926986937, 'learning_rate': 8.709909887831172e-06, 'epoch': 0.26} + 26%|██▌ | 3132/12188 [6:44:34<18:21:48, 7.30s/it] 26%|██▌ | 3133/12188 [6:44:40<17:56:09, 7.13s/it] {'loss': 0.3539, 'grad_norm': 0.7428250866988848, 'learning_rate': 8.70901896594746e-06, 'epoch': 0.26} + 26%|██▌ | 3133/12188 [6:44:40<17:56:09, 7.13s/it] 26%|██▌ | 3134/12188 [6:44:48<18:09:13, 7.22s/it] {'loss': 0.3959, 'grad_norm': 0.8011847400343893, 'learning_rate': 8.708127782138788e-06, 'epoch': 0.26} + 26%|██▌ | 3134/12188 [6:44:48<18:09:13, 7.22s/it] 26%|██▌ | 3135/12188 [6:44:56<18:41:44, 7.43s/it] {'loss': 0.3474, 'grad_norm': 0.7458113687547491, 'learning_rate': 8.707236336468089e-06, 'epoch': 0.26} + 26%|██▌ | 3135/12188 [6:44:56<18:41:44, 7.43s/it] 26%|██▌ | 3136/12188 [6:45:04<18:56:19, 7.53s/it] {'loss': 0.3868, 'grad_norm': 0.6841930482702712, 'learning_rate': 8.706344628998315e-06, 'epoch': 0.26} + 26%|██▌ | 3136/12188 [6:45:04<18:56:19, 7.53s/it] 26%|██▌ | 3137/12188 [6:45:10<18:28:46, 7.35s/it] {'loss': 0.4093, 'grad_norm': 0.6558018591547164, 'learning_rate': 8.705452659792439e-06, 'epoch': 0.26} + 26%|██▌ | 3137/12188 [6:45:10<18:28:46, 7.35s/it] 26%|██▌ | 3138/12188 [6:45:18<18:22:40, 7.31s/it] {'loss': 0.3612, 'grad_norm': 0.7643772301741847, 'learning_rate': 8.704560428913451e-06, 'epoch': 0.26} + 26%|██▌ | 3138/12188 [6:45:18<18:22:40, 7.31s/it] 26%|██▌ | 3139/12188 [6:45:24<17:50:39, 7.10s/it] {'loss': 0.3534, 'grad_norm': 0.7424897260193716, 'learning_rate': 8.703667936424357e-06, 'epoch': 0.26} + 26%|██▌ | 3139/12188 [6:45:24<17:50:39, 7.10s/it] 26%|██▌ | 3140/12188 [6:45:31<17:45:32, 7.07s/it] {'loss': 0.3565, 'grad_norm': 0.7388482348297393, 'learning_rate': 8.702775182388182e-06, 'epoch': 0.26} + 26%|██▌ | 3140/12188 [6:45:31<17:45:32, 7.07s/it] 26%|██▌ | 3141/12188 [6:45:40<19:02:26, 7.58s/it] {'loss': 0.3425, 'grad_norm': 0.6605223023147049, 'learning_rate': 8.701882166867974e-06, 'epoch': 0.26} + 26%|██▌ | 3141/12188 [6:45:40<19:02:26, 7.58s/it] 26%|██▌ | 3142/12188 [6:45:47<18:37:20, 7.41s/it] {'loss': 0.3897, 'grad_norm': 0.970269352791597, 'learning_rate': 8.700988889926793e-06, 'epoch': 0.26} + 26%|██▌ | 3142/12188 [6:45:47<18:37:20, 7.41s/it] 26%|██▌ | 3143/12188 [6:45:55<18:40:49, 7.44s/it] {'loss': 0.3687, 'grad_norm': 0.7797093430693782, 'learning_rate': 8.700095351627725e-06, 'epoch': 0.26} + 26%|██▌ | 3143/12188 [6:45:55<18:40:49, 7.44s/it] 26%|██▌ | 3144/12188 [6:46:02<18:22:52, 7.32s/it] {'loss': 0.4083, 'grad_norm': 0.7057463372552539, 'learning_rate': 8.699201552033866e-06, 'epoch': 0.26} + 26%|██▌ | 3144/12188 [6:46:02<18:22:52, 7.32s/it] 26%|██▌ | 3145/12188 [6:46:09<18:18:25, 7.29s/it] {'loss': 0.3924, 'grad_norm': 0.679278765238125, 'learning_rate': 8.698307491208338e-06, 'epoch': 0.26} + 26%|██▌ | 3145/12188 [6:46:09<18:18:25, 7.29s/it] 26%|██▌ | 3146/12188 [6:46:16<18:13:07, 7.25s/it] {'loss': 0.3554, 'grad_norm': 0.6240637632897383, 'learning_rate': 8.697413169214275e-06, 'epoch': 0.26} + 26%|██▌ | 3146/12188 [6:46:16<18:13:07, 7.25s/it] 26%|██▌ | 3147/12188 [6:46:23<18:22:30, 7.32s/it] {'loss': 0.3619, 'grad_norm': 0.6908611847725763, 'learning_rate': 8.696518586114836e-06, 'epoch': 0.26} + 26%|██▌ | 3147/12188 [6:46:23<18:22:30, 7.32s/it] 26%|██▌ | 3148/12188 [6:46:30<18:08:04, 7.22s/it] {'loss': 0.378, 'grad_norm': 0.8104880066789462, 'learning_rate': 8.695623741973194e-06, 'epoch': 0.26} + 26%|██▌ | 3148/12188 [6:46:30<18:08:04, 7.22s/it] 26%|██▌ | 3149/12188 [6:46:38<18:07:14, 7.22s/it] {'loss': 0.3875, 'grad_norm': 0.8373296263428138, 'learning_rate': 8.694728636852538e-06, 'epoch': 0.26} + 26%|██▌ | 3149/12188 [6:46:38<18:07:14, 7.22s/it] 26%|██▌ | 3150/12188 [6:46:45<18:00:02, 7.17s/it] {'loss': 0.3816, 'grad_norm': 0.6428865257449289, 'learning_rate': 8.693833270816083e-06, 'epoch': 0.26} + 26%|██▌ | 3150/12188 [6:46:45<18:00:02, 7.17s/it] 26%|██▌ | 3151/12188 [6:46:52<18:19:33, 7.30s/it] {'loss': 0.3391, 'grad_norm': 0.7097985361668929, 'learning_rate': 8.692937643927058e-06, 'epoch': 0.26} + 26%|██▌ | 3151/12188 [6:46:52<18:19:33, 7.30s/it] 26%|██▌ | 3152/12188 [6:46:59<18:00:25, 7.17s/it] {'loss': 0.3716, 'grad_norm': 0.9167344511255734, 'learning_rate': 8.69204175624871e-06, 'epoch': 0.26} + 26%|██▌ | 3152/12188 [6:46:59<18:00:25, 7.17s/it] 26%|██▌ | 3153/12188 [6:47:07<18:30:17, 7.37s/it] {'loss': 0.4019, 'grad_norm': 0.8911211227606003, 'learning_rate': 8.691145607844304e-06, 'epoch': 0.26} + 26%|██▌ | 3153/12188 [6:47:07<18:30:17, 7.37s/it] 26%|██▌ | 3154/12188 [6:47:15<19:14:10, 7.67s/it] {'loss': 0.3571, 'grad_norm': 0.6503747754520841, 'learning_rate': 8.690249198777126e-06, 'epoch': 0.26} + 26%|██▌ | 3154/12188 [6:47:15<19:14:10, 7.67s/it] 26%|██▌ | 3155/12188 [6:47:23<18:55:57, 7.55s/it] {'loss': 0.3868, 'grad_norm': 0.7816476818370983, 'learning_rate': 8.689352529110478e-06, 'epoch': 0.26} + 26%|██▌ | 3155/12188 [6:47:23<18:55:57, 7.55s/it] 26%|██▌ | 3156/12188 [6:47:33<20:56:18, 8.35s/it] {'loss': 0.3968, 'grad_norm': 0.6697908427987581, 'learning_rate': 8.688455598907681e-06, 'epoch': 0.26} + 26%|██▌ | 3156/12188 [6:47:33<20:56:18, 8.35s/it] 26%|██▌ | 3157/12188 [6:47:40<20:20:54, 8.11s/it] {'loss': 0.3452, 'grad_norm': 0.7453234468912611, 'learning_rate': 8.687558408232076e-06, 'epoch': 0.26} + 26%|██▌ | 3157/12188 [6:47:40<20:20:54, 8.11s/it] 26%|██▌ | 3158/12188 [6:47:48<19:47:38, 7.89s/it] {'loss': 0.3612, 'grad_norm': 0.6713603638243494, 'learning_rate': 8.686660957147022e-06, 'epoch': 0.26} + 26%|██▌ | 3158/12188 [6:47:48<19:47:38, 7.89s/it] 26%|██▌ | 3159/12188 [6:47:55<19:29:51, 7.77s/it] {'loss': 0.3878, 'grad_norm': 0.7281823224805904, 'learning_rate': 8.685763245715892e-06, 'epoch': 0.26} + 26%|██▌ | 3159/12188 [6:47:55<19:29:51, 7.77s/it] 26%|██▌ | 3160/12188 [6:48:03<19:07:39, 7.63s/it] {'loss': 0.3436, 'grad_norm': 0.6185378397304611, 'learning_rate': 8.684865274002085e-06, 'epoch': 0.26} + 26%|██▌ | 3160/12188 [6:48:03<19:07:39, 7.63s/it] 26%|██▌ | 3161/12188 [6:48:10<18:57:23, 7.56s/it] {'loss': 0.3665, 'grad_norm': 0.6650556484057979, 'learning_rate': 8.683967042069013e-06, 'epoch': 0.26} + 26%|██▌ | 3161/12188 [6:48:10<18:57:23, 7.56s/it] 26%|██▌ | 3162/12188 [6:48:17<18:16:58, 7.29s/it] {'loss': 0.3885, 'grad_norm': 0.6584477396312579, 'learning_rate': 8.683068549980106e-06, 'epoch': 0.26} + 26%|██▌ | 3162/12188 [6:48:17<18:16:58, 7.29s/it] 26%|██▌ | 3163/12188 [6:48:23<17:44:55, 7.08s/it] {'loss': 0.3702, 'grad_norm': 0.7620441465304493, 'learning_rate': 8.682169797798815e-06, 'epoch': 0.26} + 26%|██▌ | 3163/12188 [6:48:23<17:44:55, 7.08s/it] 26%|██▌ | 3164/12188 [6:48:31<18:04:16, 7.21s/it] {'loss': 0.3781, 'grad_norm': 0.9322986109480751, 'learning_rate': 8.681270785588609e-06, 'epoch': 0.26} + 26%|██▌ | 3164/12188 [6:48:31<18:04:16, 7.21s/it] 26%|██▌ | 3165/12188 [6:48:37<17:39:26, 7.04s/it] {'loss': 0.3609, 'grad_norm': 0.6439292904015512, 'learning_rate': 8.680371513412975e-06, 'epoch': 0.26} + 26%|██▌ | 3165/12188 [6:48:37<17:39:26, 7.04s/it] 26%|██▌ | 3166/12188 [6:48:44<17:24:58, 6.95s/it] {'loss': 0.3757, 'grad_norm': 0.7562510900172674, 'learning_rate': 8.679471981335418e-06, 'epoch': 0.26} + 26%|██▌ | 3166/12188 [6:48:44<17:24:58, 6.95s/it] 26%|██▌ | 3167/12188 [6:48:51<17:42:13, 7.07s/it] {'loss': 0.3785, 'grad_norm': 0.7864026859619301, 'learning_rate': 8.678572189419461e-06, 'epoch': 0.26} + 26%|██▌ | 3167/12188 [6:48:51<17:42:13, 7.07s/it] 26%|██▌ | 3168/12188 [6:48:59<18:04:57, 7.22s/it] {'loss': 0.3741, 'grad_norm': 0.6676929479268652, 'learning_rate': 8.677672137728645e-06, 'epoch': 0.26} + 26%|██▌ | 3168/12188 [6:48:59<18:04:57, 7.22s/it] 26%|██▌ | 3169/12188 [6:49:06<17:50:50, 7.12s/it] {'loss': 0.3844, 'grad_norm': 0.7591932369625426, 'learning_rate': 8.676771826326533e-06, 'epoch': 0.26} + 26%|██▌ | 3169/12188 [6:49:06<17:50:50, 7.12s/it] 26%|██▌ | 3170/12188 [6:49:13<17:27:36, 6.97s/it] {'loss': 0.3573, 'grad_norm': 0.676637261864924, 'learning_rate': 8.6758712552767e-06, 'epoch': 0.26} + 26%|██▌ | 3170/12188 [6:49:13<17:27:36, 6.97s/it] 26%|██▌ | 3171/12188 [6:49:20<17:45:19, 7.09s/it] {'loss': 0.3446, 'grad_norm': 0.6862974032385996, 'learning_rate': 8.674970424642745e-06, 'epoch': 0.26} + 26%|██▌ | 3171/12188 [6:49:20<17:45:19, 7.09s/it] 26%|██▌ | 3172/12188 [6:49:27<17:40:59, 7.06s/it] {'loss': 0.3766, 'grad_norm': 0.8217609971756128, 'learning_rate': 8.674069334488284e-06, 'epoch': 0.26} + 26%|██▌ | 3172/12188 [6:49:27<17:40:59, 7.06s/it] 26%|██▌ | 3173/12188 [6:49:34<17:52:20, 7.14s/it] {'loss': 0.4219, 'grad_norm': 0.872000909382775, 'learning_rate': 8.67316798487695e-06, 'epoch': 0.26} + 26%|██▌ | 3173/12188 [6:49:34<17:52:20, 7.14s/it] 26%|██▌ | 3174/12188 [6:49:41<17:33:14, 7.01s/it] {'loss': 0.3462, 'grad_norm': 0.6317096560431704, 'learning_rate': 8.672266375872392e-06, 'epoch': 0.26} + 26%|██▌ | 3174/12188 [6:49:41<17:33:14, 7.01s/it] 26%|██▌ | 3175/12188 [6:49:48<17:19:40, 6.92s/it] {'loss': 0.4294, 'grad_norm': 0.8354059107714799, 'learning_rate': 8.671364507538284e-06, 'epoch': 0.26} + 26%|██▌ | 3175/12188 [6:49:48<17:19:40, 6.92s/it] 26%|██▌ | 3176/12188 [6:49:56<18:11:04, 7.26s/it] {'loss': 0.3531, 'grad_norm': 0.6842325914013869, 'learning_rate': 8.670462379938313e-06, 'epoch': 0.26} + 26%|██▌ | 3176/12188 [6:49:56<18:11:04, 7.26s/it] 26%|██▌ | 3177/12188 [6:50:03<18:07:54, 7.24s/it] {'loss': 0.3542, 'grad_norm': 0.6745021993281622, 'learning_rate': 8.669559993136185e-06, 'epoch': 0.26} + 26%|██▌ | 3177/12188 [6:50:03<18:07:54, 7.24s/it] 26%|██▌ | 3178/12188 [6:50:10<18:00:41, 7.20s/it] {'loss': 0.3627, 'grad_norm': 0.6275608482114347, 'learning_rate': 8.668657347195627e-06, 'epoch': 0.26} + 26%|██▌ | 3178/12188 [6:50:10<18:00:41, 7.20s/it] 26%|██▌ | 3179/12188 [6:50:17<17:31:45, 7.00s/it] {'loss': 0.3808, 'grad_norm': 0.6408103104894711, 'learning_rate': 8.66775444218038e-06, 'epoch': 0.26} + 26%|██▌ | 3179/12188 [6:50:17<17:31:45, 7.00s/it] 26%|██▌ | 3180/12188 [6:50:24<18:00:13, 7.20s/it] {'loss': 0.3655, 'grad_norm': 0.6971596452561198, 'learning_rate': 8.666851278154208e-06, 'epoch': 0.26} + 26%|██▌ | 3180/12188 [6:50:24<18:00:13, 7.20s/it] 26%|██▌ | 3181/12188 [6:50:32<18:15:50, 7.30s/it] {'loss': 0.3779, 'grad_norm': 0.9113848394650199, 'learning_rate': 8.665947855180889e-06, 'epoch': 0.26} + 26%|██▌ | 3181/12188 [6:50:32<18:15:50, 7.30s/it] 26%|██▌ | 3182/12188 [6:50:39<18:29:51, 7.39s/it] {'loss': 0.4087, 'grad_norm': 0.9659987441869748, 'learning_rate': 8.665044173324225e-06, 'epoch': 0.26} + 26%|██▌ | 3182/12188 [6:50:39<18:29:51, 7.39s/it] 26%|██▌ | 3183/12188 [6:50:47<18:22:09, 7.34s/it] {'loss': 0.4262, 'grad_norm': 0.7461397821440396, 'learning_rate': 8.664140232648026e-06, 'epoch': 0.26} + 26%|██▌ | 3183/12188 [6:50:47<18:22:09, 7.34s/it] 26%|██▌ | 3184/12188 [6:50:54<18:08:46, 7.26s/it] {'loss': 0.3317, 'grad_norm': 0.6998101999176618, 'learning_rate': 8.663236033216133e-06, 'epoch': 0.26} + 26%|██▌ | 3184/12188 [6:50:54<18:08:46, 7.26s/it] 26%|██▌ | 3185/12188 [6:51:01<18:10:13, 7.27s/it] {'loss': 0.3755, 'grad_norm': 0.8824764177083688, 'learning_rate': 8.662331575092396e-06, 'epoch': 0.26} + 26%|██▌ | 3185/12188 [6:51:01<18:10:13, 7.27s/it] 26%|██▌ | 3186/12188 [6:51:09<18:24:35, 7.36s/it] {'loss': 0.375, 'grad_norm': 0.6955518195546772, 'learning_rate': 8.661426858340687e-06, 'epoch': 0.26} + 26%|██▌ | 3186/12188 [6:51:09<18:24:35, 7.36s/it] 26%|██▌ | 3187/12188 [6:51:16<18:13:58, 7.29s/it] {'loss': 0.4199, 'grad_norm': 0.6840782445293266, 'learning_rate': 8.660521883024895e-06, 'epoch': 0.26} + 26%|██▌ | 3187/12188 [6:51:16<18:13:58, 7.29s/it] 26%|██▌ | 3188/12188 [6:51:23<18:06:00, 7.24s/it] {'loss': 0.345, 'grad_norm': 0.726703061227362, 'learning_rate': 8.65961664920893e-06, 'epoch': 0.26} + 26%|██▌ | 3188/12188 [6:51:23<18:06:00, 7.24s/it] 26%|██▌ | 3189/12188 [6:51:30<18:16:30, 7.31s/it] {'loss': 0.4116, 'grad_norm': 0.6660433052332396, 'learning_rate': 8.658711156956719e-06, 'epoch': 0.26} + 26%|██▌ | 3189/12188 [6:51:30<18:16:30, 7.31s/it] 26%|██▌ | 3190/12188 [6:51:38<18:45:18, 7.50s/it] {'loss': 0.3496, 'grad_norm': 0.6362847054808488, 'learning_rate': 8.6578054063322e-06, 'epoch': 0.26} + 26%|██▌ | 3190/12188 [6:51:38<18:45:18, 7.50s/it] 26%|██▌ | 3191/12188 [6:51:46<18:47:49, 7.52s/it] {'loss': 0.3423, 'grad_norm': 0.5719256410813679, 'learning_rate': 8.656899397399343e-06, 'epoch': 0.26} + 26%|██▌ | 3191/12188 [6:51:46<18:47:49, 7.52s/it] 26%|██▌ | 3192/12188 [6:51:53<18:28:27, 7.39s/it] {'loss': 0.3846, 'grad_norm': 0.821187213726844, 'learning_rate': 8.655993130222124e-06, 'epoch': 0.26} + 26%|██▌ | 3192/12188 [6:51:53<18:28:27, 7.39s/it] 26%|██▌ | 3193/12188 [6:52:00<18:12:49, 7.29s/it] {'loss': 0.3809, 'grad_norm': 0.6994188008868596, 'learning_rate': 8.655086604864545e-06, 'epoch': 0.26} + 26%|██▌ | 3193/12188 [6:52:00<18:12:49, 7.29s/it] 26%|██▌ | 3194/12188 [6:52:07<17:59:17, 7.20s/it] {'loss': 0.3407, 'grad_norm': 0.6576833782345283, 'learning_rate': 8.65417982139062e-06, 'epoch': 0.26} + 26%|██▌ | 3194/12188 [6:52:07<17:59:17, 7.20s/it] 26%|██▌ | 3195/12188 [6:52:14<17:56:57, 7.19s/it] {'loss': 0.3649, 'grad_norm': 0.6064850972496457, 'learning_rate': 8.65327277986439e-06, 'epoch': 0.26} + 26%|██▌ | 3195/12188 [6:52:14<17:56:57, 7.19s/it] 26%|██▌ | 3196/12188 [6:52:21<17:31:00, 7.01s/it] {'loss': 0.3849, 'grad_norm': 0.7598725275017515, 'learning_rate': 8.652365480349904e-06, 'epoch': 0.26} + 26%|██▌ | 3196/12188 [6:52:21<17:31:00, 7.01s/it] 26%|██▌ | 3197/12188 [6:52:28<18:01:52, 7.22s/it] {'loss': 0.4065, 'grad_norm': 0.8441698055133459, 'learning_rate': 8.651457922911237e-06, 'epoch': 0.26} + 26%|██▌ | 3197/12188 [6:52:28<18:01:52, 7.22s/it] 26%|██▌ | 3198/12188 [6:52:39<20:39:47, 8.27s/it] {'loss': 0.3854, 'grad_norm': 0.8925089476833528, 'learning_rate': 8.650550107612476e-06, 'epoch': 0.26} + 26%|██▌ | 3198/12188 [6:52:39<20:39:47, 8.27s/it] 26%|██▌ | 3199/12188 [6:52:46<19:30:03, 7.81s/it] {'loss': 0.375, 'grad_norm': 0.7144866912545919, 'learning_rate': 8.649642034517734e-06, 'epoch': 0.26} + 26%|██▌ | 3199/12188 [6:52:46<19:30:03, 7.81s/it] 26%|██▋ | 3200/12188 [6:52:53<18:59:15, 7.61s/it] {'loss': 0.3604, 'grad_norm': 0.7182152168181775, 'learning_rate': 8.648733703691135e-06, 'epoch': 0.26} + 26%|██▋ | 3200/12188 [6:52:53<18:59:15, 7.61s/it] 26%|██▋ | 3201/12188 [6:53:00<18:17:31, 7.33s/it] {'loss': 0.3798, 'grad_norm': 0.930773357163344, 'learning_rate': 8.647825115196822e-06, 'epoch': 0.26} + 26%|██▋ | 3201/12188 [6:53:00<18:17:31, 7.33s/it] 26%|██▋ | 3202/12188 [6:53:07<18:22:15, 7.36s/it] {'loss': 0.3639, 'grad_norm': 0.6590250366615306, 'learning_rate': 8.646916269098961e-06, 'epoch': 0.26} + 26%|██▋ | 3202/12188 [6:53:07<18:22:15, 7.36s/it] 26%|██▋ | 3203/12188 [6:53:14<18:05:00, 7.25s/it] {'loss': 0.3522, 'grad_norm': 0.867241860071896, 'learning_rate': 8.646007165461732e-06, 'epoch': 0.26} + 26%|██▋ | 3203/12188 [6:53:14<18:05:00, 7.25s/it] 26%|██▋ | 3204/12188 [6:53:21<18:02:33, 7.23s/it] {'loss': 0.3576, 'grad_norm': 0.6315540145595606, 'learning_rate': 8.645097804349334e-06, 'epoch': 0.26} + 26%|██▋ | 3204/12188 [6:53:21<18:02:33, 7.23s/it] 26%|██▋ | 3205/12188 [6:53:29<18:17:44, 7.33s/it] {'loss': 0.387, 'grad_norm': 0.9136374261081334, 'learning_rate': 8.644188185825986e-06, 'epoch': 0.26} + 26%|██▋ | 3205/12188 [6:53:29<18:17:44, 7.33s/it] 26%|██▋ | 3206/12188 [6:53:36<17:53:16, 7.17s/it] {'loss': 0.3479, 'grad_norm': 0.663583004873792, 'learning_rate': 8.643278309955924e-06, 'epoch': 0.26} + 26%|██▋ | 3206/12188 [6:53:36<17:53:16, 7.17s/it] 26%|██▋ | 3207/12188 [6:53:44<18:35:10, 7.45s/it] {'loss': 0.3226, 'grad_norm': 0.6824957562695569, 'learning_rate': 8.642368176803399e-06, 'epoch': 0.26} + 26%|██▋ | 3207/12188 [6:53:44<18:35:10, 7.45s/it] 26%|██▋ | 3208/12188 [6:53:51<18:13:14, 7.30s/it] {'loss': 0.3384, 'grad_norm': 0.6437995995973135, 'learning_rate': 8.641457786432687e-06, 'epoch': 0.26} + 26%|██▋ | 3208/12188 [6:53:51<18:13:14, 7.30s/it] 26%|██▋ | 3209/12188 [6:53:58<17:52:58, 7.17s/it] {'loss': 0.3533, 'grad_norm': 0.970874036938425, 'learning_rate': 8.640547138908077e-06, 'epoch': 0.26} + 26%|██▋ | 3209/12188 [6:53:58<17:52:58, 7.17s/it] 26%|██▋ | 3210/12188 [6:54:06<18:29:27, 7.41s/it] {'loss': 0.3936, 'grad_norm': 0.7048179393632471, 'learning_rate': 8.639636234293878e-06, 'epoch': 0.26} + 26%|██▋ | 3210/12188 [6:54:06<18:29:27, 7.41s/it] 26%|██▋ | 3211/12188 [6:54:14<19:09:23, 7.68s/it] {'loss': 0.3713, 'grad_norm': 0.671542169402969, 'learning_rate': 8.638725072654413e-06, 'epoch': 0.26} + 26%|██▋ | 3211/12188 [6:54:14<19:09:23, 7.68s/it] 26%|██▋ | 3212/12188 [6:54:21<18:28:34, 7.41s/it] {'loss': 0.3858, 'grad_norm': 0.7769028436988911, 'learning_rate': 8.63781365405403e-06, 'epoch': 0.26} + 26%|██▋ | 3212/12188 [6:54:21<18:28:34, 7.41s/it] 26%|██▋ | 3213/12188 [6:54:28<18:24:20, 7.38s/it] {'loss': 0.3863, 'grad_norm': 0.6528740141544908, 'learning_rate': 8.636901978557092e-06, 'epoch': 0.26} + 26%|██▋ | 3213/12188 [6:54:28<18:24:20, 7.38s/it] 26%|██▋ | 3214/12188 [6:54:35<18:14:02, 7.31s/it] {'loss': 0.3804, 'grad_norm': 0.6207298013852083, 'learning_rate': 8.63599004622798e-06, 'epoch': 0.26} + 26%|██▋ | 3214/12188 [6:54:35<18:14:02, 7.31s/it] 26%|██▋ | 3215/12188 [6:54:42<18:03:24, 7.24s/it] {'loss': 0.4064, 'grad_norm': 0.6433253737367097, 'learning_rate': 8.635077857131091e-06, 'epoch': 0.26} + 26%|██▋ | 3215/12188 [6:54:42<18:03:24, 7.24s/it] 26%|██▋ | 3216/12188 [6:54:49<17:48:15, 7.14s/it] {'loss': 0.3751, 'grad_norm': 0.6828524581270953, 'learning_rate': 8.634165411330845e-06, 'epoch': 0.26} + 26%|██▋ | 3216/12188 [6:54:49<17:48:15, 7.14s/it] 26%|██▋ | 3217/12188 [6:54:56<17:29:24, 7.02s/it] {'loss': 0.3538, 'grad_norm': 0.6689959753054014, 'learning_rate': 8.633252708891677e-06, 'epoch': 0.26} + 26%|██▋ | 3217/12188 [6:54:56<17:29:24, 7.02s/it] 26%|██▋ | 3218/12188 [6:55:03<17:39:19, 7.09s/it] {'loss': 0.3694, 'grad_norm': 0.6667264992057949, 'learning_rate': 8.632339749878038e-06, 'epoch': 0.26} + 26%|██▋ | 3218/12188 [6:55:03<17:39:19, 7.09s/it] 26%|██▋ | 3219/12188 [6:55:12<18:58:30, 7.62s/it] {'loss': 0.3445, 'grad_norm': 0.6772850431914896, 'learning_rate': 8.631426534354404e-06, 'epoch': 0.26} + 26%|██▋ | 3219/12188 [6:55:12<18:58:30, 7.62s/it] 26%|██▋ | 3220/12188 [6:55:19<18:46:39, 7.54s/it] {'loss': 0.3829, 'grad_norm': 0.6227125483324175, 'learning_rate': 8.63051306238526e-06, 'epoch': 0.26} + 26%|██▋ | 3220/12188 [6:55:19<18:46:39, 7.54s/it] 26%|██▋ | 3221/12188 [6:55:26<18:20:11, 7.36s/it] {'loss': 0.348, 'grad_norm': 0.7325950010051868, 'learning_rate': 8.629599334035119e-06, 'epoch': 0.26} + 26%|██▋ | 3221/12188 [6:55:26<18:20:11, 7.36s/it] 26%|██▋ | 3222/12188 [6:55:33<17:56:18, 7.20s/it] {'loss': 0.4011, 'grad_norm': 0.6511875370197818, 'learning_rate': 8.628685349368502e-06, 'epoch': 0.26} + 26%|██▋ | 3222/12188 [6:55:33<17:56:18, 7.20s/it] 26%|██▋ | 3223/12188 [6:55:40<17:36:20, 7.07s/it] {'loss': 0.4098, 'grad_norm': 0.7485857068953476, 'learning_rate': 8.627771108449956e-06, 'epoch': 0.26} + 26%|██▋ | 3223/12188 [6:55:40<17:36:20, 7.07s/it] 26%|██▋ | 3224/12188 [6:55:48<18:10:58, 7.30s/it] {'loss': 0.3611, 'grad_norm': 0.6082163498104752, 'learning_rate': 8.626856611344043e-06, 'epoch': 0.26} + 26%|██▋ | 3224/12188 [6:55:48<18:10:58, 7.30s/it] 26%|██▋ | 3225/12188 [6:55:55<18:18:04, 7.35s/it] {'loss': 0.4444, 'grad_norm': 0.6395266340263815, 'learning_rate': 8.625941858115344e-06, 'epoch': 0.26} + 26%|██▋ | 3225/12188 [6:55:55<18:18:04, 7.35s/it] 26%|██▋ | 3226/12188 [6:56:02<17:50:14, 7.17s/it] {'loss': 0.334, 'grad_norm': 0.6451303156266769, 'learning_rate': 8.625026848828456e-06, 'epoch': 0.26} + 26%|██▋ | 3226/12188 [6:56:02<17:50:14, 7.17s/it] 26%|██▋ | 3227/12188 [6:56:09<17:40:34, 7.10s/it] {'loss': 0.3552, 'grad_norm': 0.6061972021851753, 'learning_rate': 8.624111583547997e-06, 'epoch': 0.26} + 26%|██▋ | 3227/12188 [6:56:09<17:40:34, 7.10s/it] 26%|██▋ | 3228/12188 [6:56:17<18:28:38, 7.42s/it] {'loss': 0.3342, 'grad_norm': 0.6181049341189951, 'learning_rate': 8.623196062338599e-06, 'epoch': 0.26} + 26%|██▋ | 3228/12188 [6:56:17<18:28:38, 7.42s/it] 26%|██▋ | 3229/12188 [6:56:25<19:05:00, 7.67s/it] {'loss': 0.3994, 'grad_norm': 0.6557250700634831, 'learning_rate': 8.622280285264917e-06, 'epoch': 0.26} + 26%|██▋ | 3229/12188 [6:56:25<19:05:00, 7.67s/it] 27%|██▋ | 3230/12188 [6:56:32<18:30:54, 7.44s/it] {'loss': 0.365, 'grad_norm': 0.6832133785576164, 'learning_rate': 8.62136425239162e-06, 'epoch': 0.27} + 27%|██▋ | 3230/12188 [6:56:32<18:30:54, 7.44s/it] 27%|██▋ | 3231/12188 [6:56:39<18:06:26, 7.28s/it] {'loss': 0.357, 'grad_norm': 0.6636789581709824, 'learning_rate': 8.620447963783398e-06, 'epoch': 0.27} + 27%|██▋ | 3231/12188 [6:56:39<18:06:26, 7.28s/it] 27%|██▋ | 3232/12188 [6:56:46<17:59:38, 7.23s/it] {'loss': 0.3683, 'grad_norm': 0.6344144547596535, 'learning_rate': 8.619531419504959e-06, 'epoch': 0.27} + 27%|██▋ | 3232/12188 [6:56:46<17:59:38, 7.23s/it] 27%|██▋ | 3233/12188 [6:56:53<18:00:47, 7.24s/it] {'loss': 0.3758, 'grad_norm': 0.6767139979645451, 'learning_rate': 8.618614619621024e-06, 'epoch': 0.27} + 27%|██▋ | 3233/12188 [6:56:53<18:00:47, 7.24s/it] 27%|██▋ | 3234/12188 [6:57:01<18:10:34, 7.31s/it] {'loss': 0.3774, 'grad_norm': 0.6772993783532404, 'learning_rate': 8.61769756419634e-06, 'epoch': 0.27} + 27%|██▋ | 3234/12188 [6:57:01<18:10:34, 7.31s/it] 27%|██▋ | 3235/12188 [6:57:10<19:11:58, 7.72s/it] {'loss': 0.3194, 'grad_norm': 0.6378409991335119, 'learning_rate': 8.616780253295666e-06, 'epoch': 0.27} + 27%|██▋ | 3235/12188 [6:57:10<19:11:58, 7.72s/it] 27%|██▋ | 3236/12188 [6:57:16<18:28:00, 7.43s/it] {'loss': 0.3561, 'grad_norm': 0.642644099401376, 'learning_rate': 8.61586268698378e-06, 'epoch': 0.27} + 27%|██▋ | 3236/12188 [6:57:16<18:28:00, 7.43s/it] 27%|██▋ | 3237/12188 [6:57:23<18:03:18, 7.26s/it] {'loss': 0.3833, 'grad_norm': 0.6939394635748598, 'learning_rate': 8.614944865325482e-06, 'epoch': 0.27} + 27%|██▋ | 3237/12188 [6:57:23<18:03:18, 7.26s/it] 27%|██▋ | 3238/12188 [6:57:33<20:10:38, 8.12s/it] {'loss': 0.3602, 'grad_norm': 0.6501407092802053, 'learning_rate': 8.614026788385586e-06, 'epoch': 0.27} + 27%|██▋ | 3238/12188 [6:57:33<20:10:38, 8.12s/it] 27%|██▋ | 3239/12188 [6:57:41<19:34:50, 7.88s/it] {'loss': 0.359, 'grad_norm': 0.6402038742112107, 'learning_rate': 8.613108456228922e-06, 'epoch': 0.27} + 27%|██▋ | 3239/12188 [6:57:41<19:34:50, 7.88s/it] 27%|██▋ | 3240/12188 [6:57:48<18:56:16, 7.62s/it] {'loss': 0.3583, 'grad_norm': 0.6639549790861202, 'learning_rate': 8.612189868920345e-06, 'epoch': 0.27} + 27%|██▋ | 3240/12188 [6:57:48<18:56:16, 7.62s/it] 27%|██▋ | 3241/12188 [6:57:55<18:38:54, 7.50s/it] {'loss': 0.4388, 'grad_norm': 0.6661981839701105, 'learning_rate': 8.611271026524724e-06, 'epoch': 0.27} + 27%|██▋ | 3241/12188 [6:57:55<18:38:54, 7.50s/it] 27%|██▋ | 3242/12188 [6:58:02<18:03:33, 7.27s/it] {'loss': 0.3364, 'grad_norm': 0.6761006437455896, 'learning_rate': 8.610351929106944e-06, 'epoch': 0.27} + 27%|██▋ | 3242/12188 [6:58:02<18:03:33, 7.27s/it] 27%|██▋ | 3243/12188 [6:58:09<18:05:53, 7.28s/it] {'loss': 0.3576, 'grad_norm': 0.689577757976695, 'learning_rate': 8.609432576731912e-06, 'epoch': 0.27} + 27%|██▋ | 3243/12188 [6:58:09<18:05:53, 7.28s/it] 27%|██▋ | 3244/12188 [6:58:17<18:52:30, 7.60s/it] {'loss': 0.3409, 'grad_norm': 0.7360804602585995, 'learning_rate': 8.608512969464548e-06, 'epoch': 0.27} + 27%|██▋ | 3244/12188 [6:58:17<18:52:30, 7.60s/it] 27%|██▋ | 3245/12188 [6:58:24<18:21:31, 7.39s/it] {'loss': 0.3601, 'grad_norm': 0.6630668349970025, 'learning_rate': 8.607593107369798e-06, 'epoch': 0.27} + 27%|██▋ | 3245/12188 [6:58:24<18:21:31, 7.39s/it] 27%|██▋ | 3246/12188 [6:58:31<18:16:13, 7.36s/it] {'loss': 0.367, 'grad_norm': 0.7216620656135864, 'learning_rate': 8.606672990512619e-06, 'epoch': 0.27} + 27%|██▋ | 3246/12188 [6:58:31<18:16:13, 7.36s/it] 27%|██▋ | 3247/12188 [6:58:38<17:37:12, 7.09s/it] {'loss': 0.4011, 'grad_norm': 0.6438797040617212, 'learning_rate': 8.605752618957986e-06, 'epoch': 0.27} + 27%|██▋ | 3247/12188 [6:58:38<17:37:12, 7.09s/it] 27%|██▋ | 3248/12188 [6:58:44<17:10:26, 6.92s/it] {'loss': 0.3679, 'grad_norm': 0.6051501087297476, 'learning_rate': 8.604831992770898e-06, 'epoch': 0.27} + 27%|██▋ | 3248/12188 [6:58:44<17:10:26, 6.92s/it] 27%|██▋ | 3249/12188 [6:58:51<17:04:02, 6.87s/it] {'loss': 0.356, 'grad_norm': 0.6236186445758509, 'learning_rate': 8.603911112016366e-06, 'epoch': 0.27} + 27%|██▋ | 3249/12188 [6:58:51<17:04:02, 6.87s/it] 27%|██▋ | 3250/12188 [6:58:58<17:17:53, 6.97s/it] {'loss': 0.3765, 'grad_norm': 0.6243825523224746, 'learning_rate': 8.60298997675942e-06, 'epoch': 0.27} + 27%|██▋ | 3250/12188 [6:58:58<17:17:53, 6.97s/it] 27%|██▋ | 3251/12188 [6:59:07<18:14:53, 7.35s/it] {'loss': 0.3342, 'grad_norm': 0.6747955268758146, 'learning_rate': 8.602068587065111e-06, 'epoch': 0.27} + 27%|██▋ | 3251/12188 [6:59:07<18:14:53, 7.35s/it] 27%|██▋ | 3252/12188 [6:59:14<17:57:00, 7.23s/it] {'loss': 0.3899, 'grad_norm': 0.6334535669023696, 'learning_rate': 8.601146942998506e-06, 'epoch': 0.27} + 27%|██▋ | 3252/12188 [6:59:14<17:57:00, 7.23s/it] 27%|██▋ | 3253/12188 [6:59:20<17:41:51, 7.13s/it] {'loss': 0.3998, 'grad_norm': 0.6158139843527525, 'learning_rate': 8.60022504462469e-06, 'epoch': 0.27} + 27%|██▋ | 3253/12188 [6:59:20<17:41:51, 7.13s/it] 27%|██▋ | 3254/12188 [6:59:27<17:23:19, 7.01s/it] {'loss': 0.3967, 'grad_norm': 0.6477846139878669, 'learning_rate': 8.599302892008765e-06, 'epoch': 0.27} + 27%|██▋ | 3254/12188 [6:59:27<17:23:19, 7.01s/it] 27%|██▋ | 3255/12188 [6:59:34<17:08:23, 6.91s/it] {'loss': 0.3745, 'grad_norm': 0.6559324470935723, 'learning_rate': 8.598380485215854e-06, 'epoch': 0.27} + 27%|██▋ | 3255/12188 [6:59:34<17:08:23, 6.91s/it] 27%|██▋ | 3256/12188 [6:59:42<17:54:02, 7.21s/it] {'loss': 0.4038, 'grad_norm': 0.6997829725696408, 'learning_rate': 8.597457824311093e-06, 'epoch': 0.27} + 27%|██▋ | 3256/12188 [6:59:42<17:54:02, 7.21s/it] 27%|██▋ | 3257/12188 [6:59:50<18:28:00, 7.44s/it] {'loss': 0.3629, 'grad_norm': 0.5901643689256617, 'learning_rate': 8.59653490935964e-06, 'epoch': 0.27} + 27%|██▋ | 3257/12188 [6:59:50<18:28:00, 7.44s/it] 27%|██▋ | 3258/12188 [6:59:56<17:51:47, 7.20s/it] {'loss': 0.3813, 'grad_norm': 0.6495234022896821, 'learning_rate': 8.59561174042667e-06, 'epoch': 0.27} + 27%|██▋ | 3258/12188 [6:59:56<17:51:47, 7.20s/it] 27%|██▋ | 3259/12188 [7:00:04<17:56:50, 7.24s/it] {'loss': 0.3316, 'grad_norm': 0.5914827102542379, 'learning_rate': 8.594688317577375e-06, 'epoch': 0.27} + 27%|██▋ | 3259/12188 [7:00:04<17:56:50, 7.24s/it] 27%|██▋ | 3260/12188 [7:00:11<17:40:15, 7.13s/it] {'loss': 0.4206, 'grad_norm': 0.6962460000048584, 'learning_rate': 8.593764640876967e-06, 'epoch': 0.27} + 27%|██▋ | 3260/12188 [7:00:11<17:40:15, 7.13s/it] 27%|██▋ | 3261/12188 [7:00:19<18:44:21, 7.56s/it] {'loss': 0.3868, 'grad_norm': 0.7182113195914129, 'learning_rate': 8.592840710390674e-06, 'epoch': 0.27} + 27%|██▋ | 3261/12188 [7:00:19<18:44:21, 7.56s/it] 27%|██▋ | 3262/12188 [7:00:27<18:45:01, 7.56s/it] {'loss': 0.384, 'grad_norm': 0.6832580505229849, 'learning_rate': 8.591916526183743e-06, 'epoch': 0.27} + 27%|██▋ | 3262/12188 [7:00:27<18:45:01, 7.56s/it] 27%|██▋ | 3263/12188 [7:00:33<18:01:37, 7.27s/it] {'loss': 0.3886, 'grad_norm': 0.7030113162990322, 'learning_rate': 8.590992088321438e-06, 'epoch': 0.27} + 27%|██▋ | 3263/12188 [7:00:33<18:01:37, 7.27s/it] 27%|██▋ | 3264/12188 [7:00:41<18:03:14, 7.28s/it] {'loss': 0.3665, 'grad_norm': 0.6731790011494098, 'learning_rate': 8.59006739686904e-06, 'epoch': 0.27} + 27%|██▋ | 3264/12188 [7:00:41<18:03:14, 7.28s/it] 27%|██▋ | 3265/12188 [7:00:48<18:01:28, 7.27s/it] {'loss': 0.3397, 'grad_norm': 0.7641656423183154, 'learning_rate': 8.589142451891849e-06, 'epoch': 0.27} + 27%|██▋ | 3265/12188 [7:00:48<18:01:28, 7.27s/it] 27%|██▋ | 3266/12188 [7:00:55<17:56:12, 7.24s/it] {'loss': 0.4146, 'grad_norm': 0.658157963939966, 'learning_rate': 8.588217253455187e-06, 'epoch': 0.27} + 27%|██▋ | 3266/12188 [7:00:55<17:56:12, 7.24s/it] 27%|██▋ | 3267/12188 [7:01:02<17:45:47, 7.17s/it] {'loss': 0.3569, 'grad_norm': 0.6907026811123446, 'learning_rate': 8.587291801624387e-06, 'epoch': 0.27} + 27%|██▋ | 3267/12188 [7:01:02<17:45:47, 7.17s/it] 27%|██▋ | 3268/12188 [7:01:09<17:57:05, 7.24s/it] {'loss': 0.3522, 'grad_norm': 0.6059179382545469, 'learning_rate': 8.586366096464803e-06, 'epoch': 0.27} + 27%|██▋ | 3268/12188 [7:01:09<17:57:05, 7.24s/it] 27%|██▋ | 3269/12188 [7:01:16<17:35:13, 7.10s/it] {'loss': 0.3944, 'grad_norm': 0.646071605463082, 'learning_rate': 8.585440138041807e-06, 'epoch': 0.27} + 27%|██▋ | 3269/12188 [7:01:16<17:35:13, 7.10s/it] 27%|██▋ | 3270/12188 [7:01:26<19:44:04, 7.97s/it] {'loss': 0.4019, 'grad_norm': 0.6028200119786715, 'learning_rate': 8.58451392642079e-06, 'epoch': 0.27} + 27%|██▋ | 3270/12188 [7:01:26<19:44:04, 7.97s/it] 27%|██▋ | 3271/12188 [7:01:33<19:04:38, 7.70s/it] {'loss': 0.3661, 'grad_norm': 0.6630482470963482, 'learning_rate': 8.583587461667156e-06, 'epoch': 0.27} + 27%|██▋ | 3271/12188 [7:01:33<19:04:38, 7.70s/it] 27%|██▋ | 3272/12188 [7:01:40<18:25:59, 7.44s/it] {'loss': 0.3999, 'grad_norm': 0.6248627487806411, 'learning_rate': 8.582660743846335e-06, 'epoch': 0.27} + 27%|██▋ | 3272/12188 [7:01:40<18:25:59, 7.44s/it] 27%|██▋ | 3273/12188 [7:01:47<17:56:58, 7.25s/it] {'loss': 0.3748, 'grad_norm': 0.5994540075743592, 'learning_rate': 8.581733773023769e-06, 'epoch': 0.27} + 27%|██▋ | 3273/12188 [7:01:47<17:56:58, 7.25s/it] 27%|██▋ | 3274/12188 [7:01:54<17:42:20, 7.15s/it] {'loss': 0.3655, 'grad_norm': 0.6203738218863328, 'learning_rate': 8.580806549264915e-06, 'epoch': 0.27} + 27%|██▋ | 3274/12188 [7:01:54<17:42:20, 7.15s/it] 27%|██▋ | 3275/12188 [7:02:01<17:45:19, 7.17s/it] {'loss': 0.3964, 'grad_norm': 0.6724758946829833, 'learning_rate': 8.579879072635257e-06, 'epoch': 0.27} + 27%|██▋ | 3275/12188 [7:02:01<17:45:19, 7.17s/it] 27%|██▋ | 3276/12188 [7:02:08<17:23:37, 7.03s/it] {'loss': 0.3463, 'grad_norm': 0.5905579702210505, 'learning_rate': 8.578951343200293e-06, 'epoch': 0.27} + 27%|██▋ | 3276/12188 [7:02:08<17:23:37, 7.03s/it] 27%|██▋ | 3277/12188 [7:02:14<17:01:48, 6.88s/it] {'loss': 0.3459, 'grad_norm': 0.591717821279884, 'learning_rate': 8.578023361025533e-06, 'epoch': 0.27} + 27%|██▋ | 3277/12188 [7:02:14<17:01:48, 6.88s/it] 27%|██▋ | 3278/12188 [7:02:21<17:07:46, 6.92s/it] {'loss': 0.3399, 'grad_norm': 0.6325227740071763, 'learning_rate': 8.57709512617651e-06, 'epoch': 0.27} + 27%|██▋ | 3278/12188 [7:02:21<17:07:46, 6.92s/it] 27%|██▋ | 3279/12188 [7:02:28<17:07:24, 6.92s/it] {'loss': 0.3703, 'grad_norm': 0.6526223794903492, 'learning_rate': 8.57616663871878e-06, 'epoch': 0.27} + 27%|██▋ | 3279/12188 [7:02:28<17:07:24, 6.92s/it] 27%|██▋ | 3280/12188 [7:02:36<17:39:43, 7.14s/it] {'loss': 0.3836, 'grad_norm': 1.3812211300760728, 'learning_rate': 8.575237898717905e-06, 'epoch': 0.27} + 27%|██▋ | 3280/12188 [7:02:36<17:39:43, 7.14s/it] 27%|██▋ | 3281/12188 [7:02:42<17:10:06, 6.94s/it] {'loss': 0.4266, 'grad_norm': 0.6976767785011899, 'learning_rate': 8.574308906239475e-06, 'epoch': 0.27} + 27%|██▋ | 3281/12188 [7:02:42<17:10:06, 6.94s/it] 27%|██▋ | 3282/12188 [7:02:50<17:35:18, 7.11s/it] {'loss': 0.3767, 'grad_norm': 0.6271205132920827, 'learning_rate': 8.57337966134909e-06, 'epoch': 0.27} + 27%|██▋ | 3282/12188 [7:02:50<17:35:18, 7.11s/it] 27%|██▋ | 3283/12188 [7:02:56<17:14:56, 6.97s/it] {'loss': 0.3802, 'grad_norm': 0.6611365503519531, 'learning_rate': 8.572450164112377e-06, 'epoch': 0.27} + 27%|██▋ | 3283/12188 [7:02:56<17:14:56, 6.97s/it] 27%|██▋ | 3284/12188 [7:03:03<17:14:19, 6.97s/it] {'loss': 0.3906, 'grad_norm': 0.6627138270800723, 'learning_rate': 8.571520414594974e-06, 'epoch': 0.27} + 27%|██▋ | 3284/12188 [7:03:03<17:14:19, 6.97s/it] 27%|██▋ | 3285/12188 [7:03:10<16:57:21, 6.86s/it] {'loss': 0.3566, 'grad_norm': 0.6145021144521077, 'learning_rate': 8.570590412862535e-06, 'epoch': 0.27} + 27%|██▋ | 3285/12188 [7:03:10<16:57:21, 6.86s/it] 27%|██▋ | 3286/12188 [7:03:17<17:11:20, 6.95s/it] {'loss': 0.3698, 'grad_norm': 0.7041261819722647, 'learning_rate': 8.56966015898074e-06, 'epoch': 0.27} + 27%|██▋ | 3286/12188 [7:03:17<17:11:20, 6.95s/it] 27%|██▋ | 3287/12188 [7:03:25<17:31:51, 7.09s/it] {'loss': 0.4043, 'grad_norm': 0.693337245368687, 'learning_rate': 8.568729653015278e-06, 'epoch': 0.27} + 27%|██▋ | 3287/12188 [7:03:25<17:31:51, 7.09s/it] 27%|██▋ | 3288/12188 [7:03:31<17:17:59, 7.00s/it] {'loss': 0.4143, 'grad_norm': 0.6878196959572115, 'learning_rate': 8.56779889503186e-06, 'epoch': 0.27} + 27%|██▋ | 3288/12188 [7:03:31<17:17:59, 7.00s/it] 27%|██▋ | 3289/12188 [7:03:39<17:38:56, 7.14s/it] {'loss': 0.3846, 'grad_norm': 0.6051001866848589, 'learning_rate': 8.566867885096217e-06, 'epoch': 0.27} + 27%|██▋ | 3289/12188 [7:03:39<17:38:56, 7.14s/it] 27%|██▋ | 3290/12188 [7:03:47<18:13:08, 7.37s/it] {'loss': 0.3513, 'grad_norm': 0.678229354167882, 'learning_rate': 8.565936623274096e-06, 'epoch': 0.27} + 27%|██▋ | 3290/12188 [7:03:47<18:13:08, 7.37s/it] 27%|██▋ | 3291/12188 [7:03:55<18:39:34, 7.55s/it] {'loss': 0.3742, 'grad_norm': 0.6759791011212622, 'learning_rate': 8.56500510963126e-06, 'epoch': 0.27} + 27%|██▋ | 3291/12188 [7:03:55<18:39:34, 7.55s/it] 27%|██▋ | 3292/12188 [7:04:03<19:19:17, 7.82s/it] {'loss': 0.363, 'grad_norm': 0.6370254385721991, 'learning_rate': 8.56407334423349e-06, 'epoch': 0.27} + 27%|██▋ | 3292/12188 [7:04:03<19:19:17, 7.82s/it] 27%|██▋ | 3293/12188 [7:04:10<18:44:09, 7.58s/it] {'loss': 0.3487, 'grad_norm': 0.6251773747632747, 'learning_rate': 8.563141327146586e-06, 'epoch': 0.27} + 27%|██▋ | 3293/12188 [7:04:10<18:44:09, 7.58s/it] 27%|██▋ | 3294/12188 [7:04:17<18:05:30, 7.32s/it] {'loss': 0.4081, 'grad_norm': 0.6429476500384653, 'learning_rate': 8.562209058436366e-06, 'epoch': 0.27} + 27%|██▋ | 3294/12188 [7:04:17<18:05:30, 7.32s/it] 27%|██▋ | 3295/12188 [7:04:25<18:40:08, 7.56s/it] {'loss': 0.3806, 'grad_norm': 0.6321418894108022, 'learning_rate': 8.561276538168666e-06, 'epoch': 0.27} + 27%|██▋ | 3295/12188 [7:04:25<18:40:08, 7.56s/it] 27%|██▋ | 3296/12188 [7:04:33<18:51:52, 7.64s/it] {'loss': 0.3779, 'grad_norm': 0.6085740262330057, 'learning_rate': 8.560343766409339e-06, 'epoch': 0.27} + 27%|██▋ | 3296/12188 [7:04:33<18:51:52, 7.64s/it] 27%|██▋ | 3297/12188 [7:04:40<18:10:40, 7.36s/it] {'loss': 0.3494, 'grad_norm': 0.6387869076648358, 'learning_rate': 8.559410743224254e-06, 'epoch': 0.27} + 27%|██▋ | 3297/12188 [7:04:40<18:10:40, 7.36s/it] 27%|██▋ | 3298/12188 [7:04:47<18:24:36, 7.46s/it] {'loss': 0.3409, 'grad_norm': 0.6360997949940853, 'learning_rate': 8.558477468679303e-06, 'epoch': 0.27} + 27%|██▋ | 3298/12188 [7:04:47<18:24:36, 7.46s/it] 27%|██▋ | 3299/12188 [7:04:55<18:42:17, 7.58s/it] {'loss': 0.3864, 'grad_norm': 0.6635218062822279, 'learning_rate': 8.557543942840386e-06, 'epoch': 0.27} + 27%|██▋ | 3299/12188 [7:04:55<18:42:17, 7.58s/it] 27%|██▋ | 3300/12188 [7:05:02<18:03:07, 7.31s/it] {'loss': 0.3743, 'grad_norm': 0.6510219920529147, 'learning_rate': 8.556610165773436e-06, 'epoch': 0.27} + 27%|██▋ | 3300/12188 [7:05:02<18:03:07, 7.31s/it] 27%|██▋ | 3301/12188 [7:05:09<17:36:43, 7.13s/it] {'loss': 0.3394, 'grad_norm': 0.6299717362146108, 'learning_rate': 8.55567613754439e-06, 'epoch': 0.27} + 27%|██▋ | 3301/12188 [7:05:09<17:36:43, 7.13s/it] 27%|██▋ | 3302/12188 [7:05:15<17:22:03, 7.04s/it] {'loss': 0.4078, 'grad_norm': 0.6361141886600025, 'learning_rate': 8.554741858219205e-06, 'epoch': 0.27} + 27%|██▋ | 3302/12188 [7:05:15<17:22:03, 7.04s/it] 27%|██▋ | 3303/12188 [7:05:25<19:01:09, 7.71s/it] {'loss': 0.3635, 'grad_norm': 0.798120255751697, 'learning_rate': 8.553807327863864e-06, 'epoch': 0.27} + 27%|██▋ | 3303/12188 [7:05:25<19:01:09, 7.71s/it] 27%|██▋ | 3304/12188 [7:05:32<18:30:02, 7.50s/it] {'loss': 0.3276, 'grad_norm': 0.6722980772604644, 'learning_rate': 8.552872546544356e-06, 'epoch': 0.27} + 27%|██▋ | 3304/12188 [7:05:32<18:30:02, 7.50s/it] 27%|██▋ | 3305/12188 [7:05:39<18:09:31, 7.36s/it] {'loss': 0.3752, 'grad_norm': 0.6831843945940786, 'learning_rate': 8.5519375143267e-06, 'epoch': 0.27} + 27%|██▋ | 3305/12188 [7:05:39<18:09:31, 7.36s/it] 27%|██▋ | 3306/12188 [7:05:46<17:56:21, 7.27s/it] {'loss': 0.3834, 'grad_norm': 0.6899635942031966, 'learning_rate': 8.55100223127692e-06, 'epoch': 0.27} + 27%|██▋ | 3306/12188 [7:05:46<17:56:21, 7.27s/it] 27%|██▋ | 3307/12188 [7:05:53<17:42:50, 7.18s/it] {'loss': 0.3726, 'grad_norm': 0.6365657990269605, 'learning_rate': 8.55006669746107e-06, 'epoch': 0.27} + 27%|██▋ | 3307/12188 [7:05:53<17:42:50, 7.18s/it] 27%|██▋ | 3308/12188 [7:06:00<17:42:57, 7.18s/it] {'loss': 0.3791, 'grad_norm': 0.6530643775922542, 'learning_rate': 8.549130912945214e-06, 'epoch': 0.27} + 27%|██▋ | 3308/12188 [7:06:00<17:42:57, 7.18s/it] 27%|██▋ | 3309/12188 [7:06:07<17:58:57, 7.29s/it] {'loss': 0.3789, 'grad_norm': 0.6860332496796889, 'learning_rate': 8.548194877795433e-06, 'epoch': 0.27} + 27%|██▋ | 3309/12188 [7:06:07<17:58:57, 7.29s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 90, in pil_loader + return img.convert("RGB") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 993, in convert + self.load() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 319, in load + raise _get_oserror(err_code, encoder=False) +OSError: broken data stream when reading image file +[Try #0] Failed to fetch sample 6013612 in VC:s3://gui-agent/data_20250623/windows_augment/images. Exception: broken data stream when reading image file +Problematic sample: {'image': 'autocad/20250508_161646_1/images/before_screenshot_1_id_73_internvl_element-caption_crop_1_grounding_instructions_point_o_paste.png', 'conversations': [{'from': 'human', 'value': "\nProvide the point located in: A 'Field' button with a small icon and white text on dark gray background, positioned in the Data section of the AutoCAD ribbon toolbar."}, {'from': 'gpt', 'value': "A 'Field' button with a small icon and white text on dark gray background, positioned in the Data section of the AutoCAD ribbon toolbar.[[262, 273]]"}], 'width': 3024, 'height': 1964} + 27%|██▋ | 3310/12188 [7:06:15<18:08:50, 7.36s/it] {'loss': 0.3747, 'grad_norm': 0.6187877077363435, 'learning_rate': 8.547258592077831e-06, 'epoch': 0.27} + 27%|██▋ | 3310/12188 [7:06:15<18:08:50, 7.36s/it] 27%|██▋ | 3311/12188 [7:06:21<17:31:53, 7.11s/it] {'loss': 0.4424, 'grad_norm': 0.6512878945588138, 'learning_rate': 8.546322055858526e-06, 'epoch': 0.27} + 27%|██▋ | 3311/12188 [7:06:21<17:31:53, 7.11s/it] 27%|██▋ | 3312/12188 [7:06:30<18:16:50, 7.41s/it] {'loss': 0.3891, 'grad_norm': 0.6325318838917804, 'learning_rate': 8.545385269203656e-06, 'epoch': 0.27} + 27%|██▋ | 3312/12188 [7:06:30<18:16:50, 7.41s/it] 27%|██▋ | 3313/12188 [7:06:36<17:34:41, 7.13s/it] {'loss': 0.3843, 'grad_norm': 0.6080239914391311, 'learning_rate': 8.544448232179375e-06, 'epoch': 0.27} + 27%|██▋ | 3313/12188 [7:06:36<17:34:41, 7.13s/it] 27%|██▋ | 3314/12188 [7:06:46<19:24:40, 7.87s/it] {'loss': 0.3867, 'grad_norm': 0.6358470331793431, 'learning_rate': 8.543510944851852e-06, 'epoch': 0.27} + 27%|██▋ | 3314/12188 [7:06:46<19:24:40, 7.87s/it] 27%|██▋ | 3315/12188 [7:06:53<19:02:49, 7.73s/it] {'loss': 0.3879, 'grad_norm': 0.7294541476765931, 'learning_rate': 8.542573407287283e-06, 'epoch': 0.27} + 27%|██▋ | 3315/12188 [7:06:53<19:02:49, 7.73s/it] 27%|██▋ | 3316/12188 [7:07:00<18:25:22, 7.48s/it] {'loss': 0.3374, 'grad_norm': 0.6377637961037489, 'learning_rate': 8.541635619551868e-06, 'epoch': 0.27} + 27%|██▋ | 3316/12188 [7:07:00<18:25:22, 7.48s/it] 27%|██▋ | 3317/12188 [7:07:08<19:09:08, 7.77s/it] {'loss': 0.3872, 'grad_norm': 0.7269738113743257, 'learning_rate': 8.540697581711836e-06, 'epoch': 0.27} + 27%|██▋ | 3317/12188 [7:07:08<19:09:08, 7.77s/it] 27%|██▋ | 3318/12188 [7:07:16<19:01:16, 7.72s/it] {'loss': 0.3528, 'grad_norm': 0.6948339737960392, 'learning_rate': 8.539759293833431e-06, 'epoch': 0.27} + 27%|██▋ | 3318/12188 [7:07:16<19:01:16, 7.72s/it] 27%|██▋ | 3319/12188 [7:07:25<19:38:48, 7.97s/it] {'loss': 0.3908, 'grad_norm': 0.6531878288613396, 'learning_rate': 8.538820755982911e-06, 'epoch': 0.27} + 27%|██▋ | 3319/12188 [7:07:25<19:38:48, 7.97s/it] 27%|██▋ | 3320/12188 [7:07:32<18:58:15, 7.70s/it] {'loss': 0.344, 'grad_norm': 0.5979804721472852, 'learning_rate': 8.537881968226553e-06, 'epoch': 0.27} + 27%|██▋ | 3320/12188 [7:07:32<18:58:15, 7.70s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 27%|██▋ | 3321/12188 [7:07:37<17:21:15, 7.05s/it] {'loss': 0.6843, 'grad_norm': 0.7834091711478649, 'learning_rate': 8.536942930630657e-06, 'epoch': 0.27} + 27%|██▋ | 3321/12188 [7:07:37<17:21:15, 7.05s/it] 27%|██▋ | 3322/12188 [7:07:45<17:39:27, 7.17s/it] {'loss': 0.3417, 'grad_norm': 0.6236577235821512, 'learning_rate': 8.536003643261532e-06, 'epoch': 0.27} + 27%|██▋ | 3322/12188 [7:07:45<17:39:27, 7.17s/it] 27%|██▋ | 3323/12188 [7:07:52<17:29:43, 7.10s/it] {'loss': 0.4064, 'grad_norm': 0.6656137243275213, 'learning_rate': 8.535064106185513e-06, 'epoch': 0.27} + 27%|██▋ | 3323/12188 [7:07:52<17:29:43, 7.10s/it] 27%|██▋ | 3324/12188 [7:07:59<17:31:50, 7.12s/it] {'loss': 0.3657, 'grad_norm': 0.6377039487094848, 'learning_rate': 8.534124319468945e-06, 'epoch': 0.27} + 27%|██▋ | 3324/12188 [7:07:59<17:31:50, 7.12s/it] 27%|██▋ | 3325/12188 [7:08:05<17:11:47, 6.98s/it] {'loss': 0.3684, 'grad_norm': 0.6537862992972946, 'learning_rate': 8.533184283178196e-06, 'epoch': 0.27} + 27%|██▋ | 3325/12188 [7:08:05<17:11:47, 6.98s/it] 27%|██▋ | 3326/12188 [7:08:12<17:05:20, 6.94s/it] {'loss': 0.3532, 'grad_norm': 0.5822828533886745, 'learning_rate': 8.532243997379647e-06, 'epoch': 0.27} + 27%|██▋ | 3326/12188 [7:08:12<17:05:20, 6.94s/it] 27%|██▋ | 3327/12188 [7:08:19<16:53:51, 6.87s/it] {'loss': 0.3785, 'grad_norm': 0.7044216761035875, 'learning_rate': 8.531303462139705e-06, 'epoch': 0.27} + 27%|██▋ | 3327/12188 [7:08:19<16:53:51, 6.87s/it] 27%|██▋ | 3328/12188 [7:08:26<17:00:45, 6.91s/it] {'loss': 0.3549, 'grad_norm': 0.6858242909864383, 'learning_rate': 8.530362677524783e-06, 'epoch': 0.27} + 27%|██▋ | 3328/12188 [7:08:26<17:00:45, 6.91s/it] 27%|██▋ | 3329/12188 [7:08:33<16:49:46, 6.84s/it] {'loss': 0.4035, 'grad_norm': 0.6618576640803222, 'learning_rate': 8.529421643601323e-06, 'epoch': 0.27} + 27%|██▋ | 3329/12188 [7:08:33<16:49:46, 6.84s/it] 27%|██▋ | 3330/12188 [7:08:39<16:40:29, 6.78s/it] {'loss': 0.3526, 'grad_norm': 0.6205153655815526, 'learning_rate': 8.528480360435777e-06, 'epoch': 0.27} + 27%|██▋ | 3330/12188 [7:08:39<16:40:29, 6.78s/it] 27%|██▋ | 3331/12188 [7:08:46<16:53:33, 6.87s/it] {'loss': 0.3309, 'grad_norm': 0.6376699275993787, 'learning_rate': 8.527538828094617e-06, 'epoch': 0.27} + 27%|██▋ | 3331/12188 [7:08:46<16:53:33, 6.87s/it] 27%|██▋ | 3332/12188 [7:08:53<16:36:57, 6.75s/it] {'loss': 0.4104, 'grad_norm': 0.6912427793885435, 'learning_rate': 8.526597046644332e-06, 'epoch': 0.27} + 27%|██▋ | 3332/12188 [7:08:53<16:36:57, 6.75s/it] 27%|██▋ | 3333/12188 [7:09:00<16:44:34, 6.81s/it] {'loss': 0.3731, 'grad_norm': 0.6558498897207347, 'learning_rate': 8.525655016151428e-06, 'epoch': 0.27} + 27%|██▋ | 3333/12188 [7:09:00<16:44:34, 6.81s/it] 27%|██▋ | 3334/12188 [7:09:07<17:24:04, 7.08s/it] {'loss': 0.3609, 'grad_norm': 0.6215413890339537, 'learning_rate': 8.524712736682433e-06, 'epoch': 0.27} + 27%|██▋ | 3334/12188 [7:09:07<17:24:04, 7.08s/it] 27%|██▋ | 3335/12188 [7:09:15<18:07:04, 7.37s/it] {'loss': 0.3979, 'grad_norm': 0.6374270102442289, 'learning_rate': 8.523770208303885e-06, 'epoch': 0.27} + 27%|██▋ | 3335/12188 [7:09:15<18:07:04, 7.37s/it] 27%|██▋ | 3336/12188 [7:09:22<17:30:25, 7.12s/it] {'loss': 0.3652, 'grad_norm': 0.7079729830548607, 'learning_rate': 8.52282743108235e-06, 'epoch': 0.27} + 27%|██▋ | 3336/12188 [7:09:22<17:30:25, 7.12s/it] 27%|██▋ | 3337/12188 [7:09:29<17:11:12, 6.99s/it] {'loss': 0.3391, 'grad_norm': 0.6275660259556283, 'learning_rate': 8.521884405084398e-06, 'epoch': 0.27} + 27%|██▋ | 3337/12188 [7:09:29<17:11:12, 6.99s/it] 27%|██▋ | 3338/12188 [7:09:35<16:56:41, 6.89s/it] {'loss': 0.3739, 'grad_norm': 0.8776549787787862, 'learning_rate': 8.52094113037663e-06, 'epoch': 0.27} + 27%|██▋ | 3338/12188 [7:09:35<16:56:41, 6.89s/it] 27%|██▋ | 3339/12188 [7:09:42<16:45:03, 6.81s/it] {'loss': 0.3382, 'grad_norm': 0.6327790724193015, 'learning_rate': 8.519997607025657e-06, 'epoch': 0.27} + 27%|██▋ | 3339/12188 [7:09:42<16:45:03, 6.81s/it] 27%|██▋ | 3340/12188 [7:09:49<16:44:57, 6.81s/it] {'loss': 0.382, 'grad_norm': 0.669778428687306, 'learning_rate': 8.519053835098105e-06, 'epoch': 0.27} + 27%|██▋ | 3340/12188 [7:09:49<16:44:57, 6.81s/it] 27%|██▋ | 3341/12188 [7:09:56<16:53:41, 6.87s/it] {'loss': 0.3626, 'grad_norm': 0.6975543529543528, 'learning_rate': 8.518109814660627e-06, 'epoch': 0.27} + 27%|██▋ | 3341/12188 [7:09:56<16:53:41, 6.87s/it] 27%|██▋ | 3342/12188 [7:10:03<16:59:35, 6.92s/it] {'loss': 0.3522, 'grad_norm': 0.6836501746316828, 'learning_rate': 8.517165545779885e-06, 'epoch': 0.27} + 27%|██▋ | 3342/12188 [7:10:03<16:59:35, 6.92s/it] 27%|██▋ | 3343/12188 [7:10:10<16:55:52, 6.89s/it] {'loss': 0.3857, 'grad_norm': 0.7087949164875523, 'learning_rate': 8.516221028522565e-06, 'epoch': 0.27} + 27%|██▋ | 3343/12188 [7:10:10<16:55:52, 6.89s/it] 27%|██▋ | 3344/12188 [7:10:16<16:51:13, 6.86s/it] {'loss': 0.3783, 'grad_norm': 0.6347795408501671, 'learning_rate': 8.515276262955363e-06, 'epoch': 0.27} + 27%|██▋ | 3344/12188 [7:10:16<16:51:13, 6.86s/it] 27%|██▋ | 3345/12188 [7:10:24<17:25:16, 7.09s/it] {'loss': 0.3607, 'grad_norm': 0.7653179499710488, 'learning_rate': 8.514331249145001e-06, 'epoch': 0.27} + 27%|██▋ | 3345/12188 [7:10:24<17:25:16, 7.09s/it] 27%|██▋ | 3346/12188 [7:10:31<16:55:17, 6.89s/it] {'loss': 0.3674, 'grad_norm': 0.6663665370776249, 'learning_rate': 8.513385987158212e-06, 'epoch': 0.27} + 27%|██▋ | 3346/12188 [7:10:31<16:55:17, 6.89s/it] 27%|██▋ | 3347/12188 [7:10:37<16:47:17, 6.84s/it] {'loss': 0.3249, 'grad_norm': 0.6473514611552825, 'learning_rate': 8.512440477061747e-06, 'epoch': 0.27} + 27%|██▋ | 3347/12188 [7:10:37<16:47:17, 6.84s/it] 27%|██▋ | 3348/12188 [7:10:44<16:46:12, 6.83s/it] {'loss': 0.347, 'grad_norm': 0.68759291184105, 'learning_rate': 8.511494718922382e-06, 'epoch': 0.27} + 27%|██▋ | 3348/12188 [7:10:44<16:46:12, 6.83s/it] 27%|██▋ | 3349/12188 [7:10:51<17:00:59, 6.93s/it] {'loss': 0.3803, 'grad_norm': 0.6310365479977752, 'learning_rate': 8.5105487128069e-06, 'epoch': 0.27} + 27%|██▋ | 3349/12188 [7:10:51<17:00:59, 6.93s/it] 27%|██▋ | 3350/12188 [7:10:59<17:49:18, 7.26s/it] {'loss': 0.354, 'grad_norm': 0.6294616534516116, 'learning_rate': 8.509602458782107e-06, 'epoch': 0.27} + 27%|██▋ | 3350/12188 [7:10:59<17:49:18, 7.26s/it] 27%|██▋ | 3351/12188 [7:11:06<17:39:14, 7.19s/it] {'loss': 0.3849, 'grad_norm': 0.6462303507706147, 'learning_rate': 8.508655956914829e-06, 'epoch': 0.27} + 27%|██▋ | 3351/12188 [7:11:06<17:39:14, 7.19s/it] 28%|██▊ | 3352/12188 [7:11:14<17:41:47, 7.21s/it] {'loss': 0.377, 'grad_norm': 0.6311783833147525, 'learning_rate': 8.507709207271903e-06, 'epoch': 0.28} + 28%|██▊ | 3352/12188 [7:11:14<17:41:47, 7.21s/it] 28%|██▊ | 3353/12188 [7:11:21<17:55:31, 7.30s/it] {'loss': 0.386, 'grad_norm': 0.7053697975881968, 'learning_rate': 8.506762209920188e-06, 'epoch': 0.28} + 28%|██▊ | 3353/12188 [7:11:21<17:55:31, 7.30s/it] 28%|██▊ | 3354/12188 [7:11:28<17:33:44, 7.16s/it] {'loss': 0.3424, 'grad_norm': 0.6493725175115188, 'learning_rate': 8.50581496492656e-06, 'epoch': 0.28} + 28%|██▊ | 3354/12188 [7:11:28<17:33:44, 7.16s/it] 28%|██▊ | 3355/12188 [7:11:35<17:35:31, 7.17s/it] {'loss': 0.3618, 'grad_norm': 0.6316487499796051, 'learning_rate': 8.504867472357913e-06, 'epoch': 0.28} + 28%|██▊ | 3355/12188 [7:11:35<17:35:31, 7.17s/it] 28%|██▊ | 3356/12188 [7:11:42<17:20:58, 7.07s/it] {'loss': 0.3457, 'grad_norm': 0.6118402492890171, 'learning_rate': 8.503919732281156e-06, 'epoch': 0.28} + 28%|██▊ | 3356/12188 [7:11:42<17:20:58, 7.07s/it] 28%|██▊ | 3357/12188 [7:11:49<17:09:07, 6.99s/it] {'loss': 0.3507, 'grad_norm': 0.6609012135000731, 'learning_rate': 8.502971744763216e-06, 'epoch': 0.28} + 28%|██▊ | 3357/12188 [7:11:49<17:09:07, 6.99s/it] 28%|██▊ | 3358/12188 [7:11:55<16:52:53, 6.88s/it] {'loss': 0.3977, 'grad_norm': 0.8407371670649131, 'learning_rate': 8.502023509871041e-06, 'epoch': 0.28} + 28%|██▊ | 3358/12188 [7:11:55<16:52:53, 6.88s/it] 28%|██▊ | 3359/12188 [7:12:03<17:11:00, 7.01s/it] {'loss': 0.3604, 'grad_norm': 0.623832364964405, 'learning_rate': 8.50107502767159e-06, 'epoch': 0.28} + 28%|██▊ | 3359/12188 [7:12:03<17:11:00, 7.01s/it] 28%|██▊ | 3360/12188 [7:12:09<17:00:22, 6.94s/it] {'loss': 0.3458, 'grad_norm': 0.6515255850079706, 'learning_rate': 8.500126298231845e-06, 'epoch': 0.28} + 28%|██▊ | 3360/12188 [7:12:09<17:00:22, 6.94s/it] 28%|██▊ | 3361/12188 [7:12:16<17:01:46, 6.95s/it] {'loss': 0.3778, 'grad_norm': 0.7450669627688858, 'learning_rate': 8.499177321618805e-06, 'epoch': 0.28} + 28%|██▊ | 3361/12188 [7:12:16<17:01:46, 6.95s/it] 28%|██▊ | 3362/12188 [7:12:23<16:54:00, 6.89s/it] {'loss': 0.3769, 'grad_norm': 0.6729363127328544, 'learning_rate': 8.498228097899486e-06, 'epoch': 0.28} + 28%|██▊ | 3362/12188 [7:12:23<16:54:00, 6.89s/it] 28%|██▊ | 3363/12188 [7:12:30<16:37:25, 6.78s/it] {'loss': 0.36, 'grad_norm': 1.1330233747119909, 'learning_rate': 8.497278627140916e-06, 'epoch': 0.28} + 28%|██▊ | 3363/12188 [7:12:30<16:37:25, 6.78s/it] 28%|██▊ | 3364/12188 [7:12:37<16:55:27, 6.90s/it] {'loss': 0.3552, 'grad_norm': 0.6319496811155692, 'learning_rate': 8.496328909410151e-06, 'epoch': 0.28} + 28%|██▊ | 3364/12188 [7:12:37<16:55:27, 6.90s/it] 28%|██▊ | 3365/12188 [7:12:43<16:36:00, 6.77s/it] {'loss': 0.3748, 'grad_norm': 1.0820283663958843, 'learning_rate': 8.495378944774256e-06, 'epoch': 0.28} + 28%|██▊ | 3365/12188 [7:12:43<16:36:00, 6.77s/it] 28%|██▊ | 3366/12188 [7:12:50<16:31:37, 6.74s/it] {'loss': 0.3565, 'grad_norm': 0.5875028667287997, 'learning_rate': 8.494428733300313e-06, 'epoch': 0.28} + 28%|██▊ | 3366/12188 [7:12:50<16:31:37, 6.74s/it] 28%|██▊ | 3367/12188 [7:12:57<16:23:42, 6.69s/it] {'loss': 0.3768, 'grad_norm': 0.6618674861782741, 'learning_rate': 8.49347827505543e-06, 'epoch': 0.28} + 28%|██▊ | 3367/12188 [7:12:57<16:23:42, 6.69s/it] 28%|��█▊ | 3368/12188 [7:13:03<16:18:25, 6.66s/it] {'loss': 0.372, 'grad_norm': 0.6847948746588862, 'learning_rate': 8.49252757010672e-06, 'epoch': 0.28} + 28%|██▊ | 3368/12188 [7:13:03<16:18:25, 6.66s/it] 28%|██▊ | 3369/12188 [7:13:10<16:17:56, 6.65s/it] {'loss': 0.3473, 'grad_norm': 0.8060639400014415, 'learning_rate': 8.491576618521328e-06, 'epoch': 0.28} + 28%|██▊ | 3369/12188 [7:13:10<16:17:56, 6.65s/it] 28%|██▊ | 3370/12188 [7:13:17<16:22:30, 6.69s/it] {'loss': 0.3548, 'grad_norm': 0.658399531407268, 'learning_rate': 8.490625420366405e-06, 'epoch': 0.28} + 28%|██▊ | 3370/12188 [7:13:17<16:22:30, 6.69s/it] 28%|██▊ | 3371/12188 [7:13:24<16:50:26, 6.88s/it] {'loss': 0.3689, 'grad_norm': 0.6051827670882558, 'learning_rate': 8.489673975709121e-06, 'epoch': 0.28} + 28%|██▊ | 3371/12188 [7:13:24<16:50:26, 6.88s/it] 28%|██▊ | 3372/12188 [7:13:31<16:46:42, 6.85s/it] {'loss': 0.3731, 'grad_norm': 0.7296156157263644, 'learning_rate': 8.488722284616668e-06, 'epoch': 0.28} + 28%|██▊ | 3372/12188 [7:13:31<16:46:42, 6.85s/it] 28%|██▊ | 3373/12188 [7:13:38<17:15:16, 7.05s/it] {'loss': 0.3621, 'grad_norm': 0.577687537422088, 'learning_rate': 8.487770347156252e-06, 'epoch': 0.28} + 28%|██▊ | 3373/12188 [7:13:38<17:15:16, 7.05s/it] 28%|██▊ | 3374/12188 [7:13:45<17:08:24, 7.00s/it] {'loss': 0.3528, 'grad_norm': 0.6496900867000808, 'learning_rate': 8.486818163395097e-06, 'epoch': 0.28} + 28%|██▊ | 3374/12188 [7:13:45<17:08:24, 7.00s/it] 28%|██▊ | 3375/12188 [7:13:52<17:15:05, 7.05s/it] {'loss': 0.3201, 'grad_norm': 0.6969801250723261, 'learning_rate': 8.485865733400447e-06, 'epoch': 0.28} + 28%|██▊ | 3375/12188 [7:13:52<17:15:05, 7.05s/it] 28%|██▊ | 3376/12188 [7:14:01<18:16:50, 7.47s/it] {'loss': 0.3469, 'grad_norm': 0.6474267464803716, 'learning_rate': 8.484913057239557e-06, 'epoch': 0.28} + 28%|██▊ | 3376/12188 [7:14:01<18:16:50, 7.47s/it] 28%|██▊ | 3377/12188 [7:14:07<17:47:30, 7.27s/it] {'loss': 0.4058, 'grad_norm': 0.7517927009950771, 'learning_rate': 8.483960134979707e-06, 'epoch': 0.28} + 28%|██▊ | 3377/12188 [7:14:07<17:47:30, 7.27s/it] 28%|██▊ | 3378/12188 [7:14:15<17:38:48, 7.21s/it] {'loss': 0.3767, 'grad_norm': 0.6678379043823098, 'learning_rate': 8.48300696668819e-06, 'epoch': 0.28} + 28%|██▊ | 3378/12188 [7:14:15<17:38:48, 7.21s/it] 28%|██▊ | 3379/12188 [7:14:22<18:00:21, 7.36s/it] {'loss': 0.3778, 'grad_norm': 0.7054996287842821, 'learning_rate': 8.482053552432318e-06, 'epoch': 0.28} + 28%|██▊ | 3379/12188 [7:14:22<18:00:21, 7.36s/it] 28%|██▊ | 3380/12188 [7:14:30<17:57:08, 7.34s/it] {'loss': 0.378, 'grad_norm': 0.6708040946719929, 'learning_rate': 8.481099892279418e-06, 'epoch': 0.28} + 28%|██▊ | 3380/12188 [7:14:30<17:57:08, 7.34s/it] 28%|██▊ | 3381/12188 [7:14:36<17:40:17, 7.22s/it] {'loss': 0.3774, 'grad_norm': 0.6506917862917554, 'learning_rate': 8.480145986296834e-06, 'epoch': 0.28} + 28%|██▊ | 3381/12188 [7:14:37<17:40:17, 7.22s/it] 28%|██▊ | 3382/12188 [7:14:44<17:49:28, 7.29s/it] {'loss': 0.355, 'grad_norm': 0.6295702248851965, 'learning_rate': 8.479191834551934e-06, 'epoch': 0.28} + 28%|██▊ | 3382/12188 [7:14:44<17:49:28, 7.29s/it] 28%|██▊ | 3383/12188 [7:14:52<18:12:06, 7.44s/it] {'loss': 0.3666, 'grad_norm': 0.6238009396916685, 'learning_rate': 8.478237437112095e-06, 'epoch': 0.28} + 28%|██▊ | 3383/12188 [7:14:52<18:12:06, 7.44s/it] 28%|██▊ | 3384/12188 [7:14:58<17:40:15, 7.23s/it] {'loss': 0.3632, 'grad_norm': 0.6464341248163621, 'learning_rate': 8.47728279404472e-06, 'epoch': 0.28} + 28%|██▊ | 3384/12188 [7:14:58<17:40:15, 7.23s/it] 28%|██▊ | 3385/12188 [7:15:05<17:15:22, 7.06s/it] {'loss': 0.3501, 'grad_norm': 0.5885182905808428, 'learning_rate': 8.476327905417217e-06, 'epoch': 0.28} + 28%|██▊ | 3385/12188 [7:15:05<17:15:22, 7.06s/it] 28%|██▊ | 3386/12188 [7:15:12<16:53:19, 6.91s/it] {'loss': 0.3661, 'grad_norm': 0.6623730728426084, 'learning_rate': 8.475372771297023e-06, 'epoch': 0.28} + 28%|██▊ | 3386/12188 [7:15:12<16:53:19, 6.91s/it] 28%|██▊ | 3387/12188 [7:15:19<16:54:28, 6.92s/it] {'loss': 0.3962, 'grad_norm': 0.6765234131901048, 'learning_rate': 8.474417391751587e-06, 'epoch': 0.28} + 28%|██▊ | 3387/12188 [7:15:19<16:54:28, 6.92s/it] 28%|██▊ | 3388/12188 [7:15:26<16:58:39, 6.95s/it] {'loss': 0.4239, 'grad_norm': 0.6559950876048656, 'learning_rate': 8.473461766848378e-06, 'epoch': 0.28} + 28%|██▊ | 3388/12188 [7:15:26<16:58:39, 6.95s/it] 28%|██▊ | 3389/12188 [7:15:33<17:01:03, 6.96s/it] {'loss': 0.4008, 'grad_norm': 0.6153620751399828, 'learning_rate': 8.472505896654881e-06, 'epoch': 0.28} + 28%|██▊ | 3389/12188 [7:15:33<17:01:03, 6.96s/it] 28%|██▊ | 3390/12188 [7:15:40<17:08:46, 7.02s/it] {'loss': 0.3645, 'grad_norm': 0.5955904603928537, 'learning_rate': 8.471549781238595e-06, 'epoch': 0.28} + 28%|██▊ | 3390/12188 [7:15:40<17:08:46, 7.02s/it] 28%|██▊ | 3391/12188 [7:15:48<18:21:14, 7.51s/it] {'loss': 0.3304, 'grad_norm': 0.76005689601423, 'learning_rate': 8.47059342066704e-06, 'epoch': 0.28} + 28%|██▊ | 3391/12188 [7:15:48<18:21:14, 7.51s/it] 28%|██▊ | 3392/12188 [7:15:55<17:48:32, 7.29s/it] {'loss': 0.3218, 'grad_norm': 0.673465866803607, 'learning_rate': 8.469636815007756e-06, 'epoch': 0.28} + 28%|██▊ | 3392/12188 [7:15:55<17:48:32, 7.29s/it] 28%|██▊ | 3393/12188 [7:16:02<17:48:39, 7.29s/it] {'loss': 0.3468, 'grad_norm': 0.6123056958932955, 'learning_rate': 8.468679964328293e-06, 'epoch': 0.28} + 28%|██▊ | 3393/12188 [7:16:03<17:48:39, 7.29s/it] 28%|██▊ | 3394/12188 [7:16:10<18:06:01, 7.41s/it] {'loss': 0.3887, 'grad_norm': 0.7724704085941378, 'learning_rate': 8.467722868696222e-06, 'epoch': 0.28} + 28%|██▊ | 3394/12188 [7:16:10<18:06:01, 7.41s/it] 28%|██▊ | 3395/12188 [7:16:20<20:09:14, 8.25s/it] {'loss': 0.3514, 'grad_norm': 1.403768095558883, 'learning_rate': 8.466765528179135e-06, 'epoch': 0.28} + 28%|██▊ | 3395/12188 [7:16:20<20:09:14, 8.25s/it] 28%|██▊ | 3396/12188 [7:16:27<19:07:34, 7.83s/it] {'loss': 0.3526, 'grad_norm': 0.6627194637876966, 'learning_rate': 8.465807942844637e-06, 'epoch': 0.28} + 28%|██▊ | 3396/12188 [7:16:27<19:07:34, 7.83s/it] 28%|██▊ | 3397/12188 [7:16:34<18:24:38, 7.54s/it] {'loss': 0.3354, 'grad_norm': 0.6272940627822977, 'learning_rate': 8.46485011276035e-06, 'epoch': 0.28} + 28%|██▊ | 3397/12188 [7:16:34<18:24:38, 7.54s/it] 28%|██▊ | 3398/12188 [7:16:42<18:23:08, 7.53s/it] {'loss': 0.3945, 'grad_norm': 0.7228014886934863, 'learning_rate': 8.463892037993915e-06, 'epoch': 0.28} + 28%|██▊ | 3398/12188 [7:16:42<18:23:08, 7.53s/it] 28%|██▊ | 3399/12188 [7:16:49<18:26:16, 7.55s/it] {'loss': 0.3498, 'grad_norm': 0.6409645453786755, 'learning_rate': 8.462933718612988e-06, 'epoch': 0.28} + 28%|██▊ | 3399/12188 [7:16:49<18:26:16, 7.55s/it] 28%|██▊ | 3400/12188 [7:16:57<18:30:14, 7.58s/it] {'loss': 0.3393, 'grad_norm': 0.6669332810482087, 'learning_rate': 8.461975154685246e-06, 'epoch': 0.28} + 28%|██▊ | 3400/12188 [7:16:57<18:30:14, 7.58s/it] 28%|██▊ | 3401/12188 [7:17:04<18:04:57, 7.41s/it] {'loss': 0.3655, 'grad_norm': 0.6532874670131701, 'learning_rate': 8.461016346278381e-06, 'epoch': 0.28} + 28%|██▊ | 3401/12188 [7:17:04<18:04:57, 7.41s/it] 28%|██▊ | 3402/12188 [7:17:11<17:42:50, 7.26s/it] {'loss': 0.3775, 'grad_norm': 0.6270392277342277, 'learning_rate': 8.460057293460102e-06, 'epoch': 0.28} + 28%|██▊ | 3402/12188 [7:17:11<17:42:50, 7.26s/it] 28%|���█▊ | 3403/12188 [7:17:18<17:27:22, 7.15s/it] {'loss': 0.3888, 'grad_norm': 0.7323421314357743, 'learning_rate': 8.459097996298137e-06, 'epoch': 0.28} + 28%|██▊ | 3403/12188 [7:17:18<17:27:22, 7.15s/it] 28%|██▊ | 3404/12188 [7:17:25<17:25:31, 7.14s/it] {'loss': 0.3855, 'grad_norm': 0.6620425194669889, 'learning_rate': 8.458138454860227e-06, 'epoch': 0.28} + 28%|██▊ | 3404/12188 [7:17:25<17:25:31, 7.14s/it] 28%|██▊ | 3405/12188 [7:17:32<17:15:05, 7.07s/it] {'loss': 0.3656, 'grad_norm': 0.6279296975635633, 'learning_rate': 8.457178669214137e-06, 'epoch': 0.28} + 28%|██▊ | 3405/12188 [7:17:32<17:15:05, 7.07s/it] 28%|██▊ | 3406/12188 [7:17:39<17:27:54, 7.16s/it] {'loss': 0.3788, 'grad_norm': 0.63499609208425, 'learning_rate': 8.456218639427643e-06, 'epoch': 0.28} + 28%|██▊ | 3406/12188 [7:17:39<17:27:54, 7.16s/it] 28%|██▊ | 3407/12188 [7:17:49<19:19:22, 7.92s/it] {'loss': 0.3912, 'grad_norm': 0.6449587567636315, 'learning_rate': 8.455258365568541e-06, 'epoch': 0.28} + 28%|██▊ | 3407/12188 [7:17:49<19:19:22, 7.92s/it] 28%|██▊ | 3408/12188 [7:17:57<19:13:03, 7.88s/it] {'loss': 0.363, 'grad_norm': 0.6490175705752423, 'learning_rate': 8.454297847704644e-06, 'epoch': 0.28} + 28%|██▊ | 3408/12188 [7:17:57<19:13:03, 7.88s/it] 28%|██▊ | 3409/12188 [7:18:03<18:22:24, 7.53s/it] {'loss': 0.3572, 'grad_norm': 0.6842078052063821, 'learning_rate': 8.453337085903785e-06, 'epoch': 0.28} + 28%|██▊ | 3409/12188 [7:18:03<18:22:24, 7.53s/it] 28%|██▊ | 3410/12188 [7:18:12<19:06:40, 7.84s/it] {'loss': 0.3358, 'grad_norm': 0.6101541451739401, 'learning_rate': 8.452376080233808e-06, 'epoch': 0.28} + 28%|██▊ | 3410/12188 [7:18:12<19:06:40, 7.84s/it] 28%|██▊ | 3411/12188 [7:18:19<18:16:41, 7.50s/it] {'loss': 0.3491, 'grad_norm': 0.6370458046077488, 'learning_rate': 8.45141483076258e-06, 'epoch': 0.28} + 28%|██▊ | 3411/12188 [7:18:19<18:16:41, 7.50s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fb0ba79a2a0> +[Try #0] Failed to fetch sample 4341692 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fb0ba79a2a0> +Problematic sample: {'image': '20240822_131046_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Facet'"}, {'from': 'gpt', 'value': '\nclick(x=0.1905, y=0.139)\n'}]} + 28%|██▊ | 3412/12188 [7:18:25<17:45:11, 7.28s/it] {'loss': 0.3679, 'grad_norm': 0.6358033865353049, 'learning_rate': 8.45045333755798e-06, 'epoch': 0.28} + 28%|██▊ | 3412/12188 [7:18:25<17:45:11, 7.28s/it] 28%|██▊ | 3413/12188 [7:18:32<17:39:16, 7.24s/it] {'loss': 0.3796, 'grad_norm': 0.769570310923854, 'learning_rate': 8.449491600687909e-06, 'epoch': 0.28} + 28%|██▊ | 3413/12188 [7:18:32<17:39:16, 7.24s/it] 28%|██▊ | 3414/12188 [7:18:39<17:23:18, 7.13s/it] {'loss': 0.3296, 'grad_norm': 0.6325342257872946, 'learning_rate': 8.448529620220284e-06, 'epoch': 0.28} + 28%|██▊ | 3414/12188 [7:18:39<17:23:18, 7.13s/it] 28%|██▊ | 3415/12188 [7:18:48<18:27:21, 7.57s/it] {'loss': 0.3878, 'grad_norm': 0.6321208902726518, 'learning_rate': 8.447567396223037e-06, 'epoch': 0.28} + 28%|██▊ | 3415/12188 [7:18:48<18:27:21, 7.57s/it] 28%|██▊ | 3416/12188 [7:18:55<18:15:20, 7.49s/it] {'loss': 0.4271, 'grad_norm': 0.6896559586463067, 'learning_rate': 8.446604928764121e-06, 'epoch': 0.28} + 28%|██▊ | 3416/12188 [7:18:55<18:15:20, 7.49s/it] 28%|██▊ | 3417/12188 [7:19:02<17:47:38, 7.30s/it] {'loss': 0.3719, 'grad_norm': 0.7026683021735592, 'learning_rate': 8.4456422179115e-06, 'epoch': 0.28} + 28%|██▊ | 3417/12188 [7:19:02<17:47:38, 7.30s/it] 28%|██▊ | 3418/12188 [7:19:09<17:23:04, 7.14s/it] {'loss': 0.3801, 'grad_norm': 0.6830953539661754, 'learning_rate': 8.444679263733163e-06, 'epoch': 0.28} + 28%|██▊ | 3418/12188 [7:19:09<17:23:04, 7.14s/it] 28%|██▊ | 3419/12188 [7:19:18<18:38:53, 7.66s/it] {'loss': 0.3532, 'grad_norm': 0.6450555241584502, 'learning_rate': 8.44371606629711e-06, 'epoch': 0.28} + 28%|██▊ | 3419/12188 [7:19:18<18:38:53, 7.66s/it] 28%|██▊ | 3420/12188 [7:19:24<17:49:42, 7.32s/it] {'loss': 0.3962, 'grad_norm': 0.6298693397151094, 'learning_rate': 8.442752625671363e-06, 'epoch': 0.28} + 28%|██▊ | 3420/12188 [7:19:24<17:49:42, 7.32s/it] 28%|██▊ | 3421/12188 [7:19:31<17:36:58, 7.23s/it] {'loss': 0.3671, 'grad_norm': 0.6614103169215835, 'learning_rate': 8.441788941923954e-06, 'epoch': 0.28} + 28%|██▊ | 3421/12188 [7:19:31<17:36:58, 7.23s/it] 28%|██▊ | 3422/12188 [7:19:38<17:22:12, 7.13s/it] {'loss': 0.3528, 'grad_norm': 0.6970377061730759, 'learning_rate': 8.44082501512294e-06, 'epoch': 0.28} + 28%|██▊ | 3422/12188 [7:19:38<17:22:12, 7.13s/it] 28%|██▊ | 3423/12188 [7:19:45<17:11:44, 7.06s/it] {'loss': 0.3758, 'grad_norm': 0.693982340722826, 'learning_rate': 8.439860845336394e-06, 'epoch': 0.28} + 28%|██▊ | 3423/12188 [7:19:45<17:11:44, 7.06s/it] 28%|██▊ | 3424/12188 [7:19:52<17:07:10, 7.03s/it] {'loss': 0.3558, 'grad_norm': 0.626164248601781, 'learning_rate': 8.4388964326324e-06, 'epoch': 0.28} + 28%|██▊ | 3424/12188 [7:19:52<17:07:10, 7.03s/it] 28%|██▊ | 3425/12188 [7:19:59<16:48:34, 6.91s/it] {'loss': 0.3773, 'grad_norm': 0.6609691032453204, 'learning_rate': 8.437931777079065e-06, 'epoch': 0.28} + 28%|██▊ | 3425/12188 [7:19:59<16:48:34, 6.91s/it] 28%|██▊ | 3426/12188 [7:20:07<17:45:17, 7.29s/it] {'loss': 0.3359, 'grad_norm': 0.5921268813373416, 'learning_rate': 8.43696687874451e-06, 'epoch': 0.28} + 28%|██▊ | 3426/12188 [7:20:07<17:45:17, 7.29s/it] 28%|██▊ | 3427/12188 [7:20:14<17:52:35, 7.35s/it] {'loss': 0.4001, 'grad_norm': 0.6835237060351199, 'learning_rate': 8.436001737696877e-06, 'epoch': 0.28} + 28%|██▊ | 3427/12188 [7:20:14<17:52:35, 7.35s/it] 28%|██▊ | 3428/12188 [7:20:21<17:42:58, 7.28s/it] {'loss': 0.3606, 'grad_norm': 0.6879833360292036, 'learning_rate': 8.435036354004322e-06, 'epoch': 0.28} + 28%|██▊ | 3428/12188 [7:20:21<17:42:58, 7.28s/it] 28%|██▊ | 3429/12188 [7:20:30<18:44:39, 7.70s/it] {'loss': 0.363, 'grad_norm': 0.6110808685597865, 'learning_rate': 8.43407072773502e-06, 'epoch': 0.28} + 28%|██▊ | 3429/12188 [7:20:30<18:44:39, 7.70s/it] 28%|██▊ | 3430/12188 [7:20:37<18:19:05, 7.53s/it] {'loss': 0.399, 'grad_norm': 0.6871048456141328, 'learning_rate': 8.433104858957157e-06, 'epoch': 0.28} + 28%|██▊ | 3430/12188 [7:20:37<18:19:05, 7.53s/it] 28%|██▊ | 3431/12188 [7:20:44<17:53:09, 7.35s/it] {'loss': 0.3722, 'grad_norm': 0.6593151752335106, 'learning_rate': 8.432138747738947e-06, 'epoch': 0.28} + 28%|██▊ | 3431/12188 [7:20:44<17:53:09, 7.35s/it] 28%|██▊ | 3432/12188 [7:20:52<17:51:41, 7.34s/it] {'loss': 0.4073, 'grad_norm': 0.7421636622724503, 'learning_rate': 8.431172394148613e-06, 'epoch': 0.28} + 28%|██▊ | 3432/12188 [7:20:52<17:51:41, 7.34s/it] 28%|██▊ | 3433/12188 [7:20:59<17:49:10, 7.33s/it] {'loss': 0.3799, 'grad_norm': 0.6100168483276671, 'learning_rate': 8.430205798254396e-06, 'epoch': 0.28} + 28%|██▊ | 3433/12188 [7:20:59<17:49:10, 7.33s/it] 28%|██▊ | 3434/12188 [7:21:06<17:30:52, 7.20s/it] {'loss': 0.3709, 'grad_norm': 0.6465055845783861, 'learning_rate': 8.429238960124557e-06, 'epoch': 0.28} + 28%|██▊ | 3434/12188 [7:21:06<17:30:52, 7.20s/it] 28%|██▊ | 3435/12188 [7:21:14<18:08:54, 7.46s/it] {'loss': 0.3363, 'grad_norm': 0.6128445637201315, 'learning_rate': 8.42827187982737e-06, 'epoch': 0.28} + 28%|██▊ | 3435/12188 [7:21:14<18:08:54, 7.46s/it] 28%|██▊ | 3436/12188 [7:21:21<17:43:26, 7.29s/it] {'loss': 0.3675, 'grad_norm': 0.7204802265835862, 'learning_rate': 8.427304557431135e-06, 'epoch': 0.28} + 28%|██▊ | 3436/12188 [7:21:21<17:43:26, 7.29s/it] 28%|██▊ | 3437/12188 [7:21:29<18:10:34, 7.48s/it] {'loss': 0.3406, 'grad_norm': 0.6114231300497421, 'learning_rate': 8.426336993004155e-06, 'epoch': 0.28} + 28%|██▊ | 3437/12188 [7:21:29<18:10:34, 7.48s/it] 28%|██▊ | 3438/12188 [7:21:36<18:08:21, 7.46s/it] {'loss': 0.3973, 'grad_norm': 0.6423786510935567, 'learning_rate': 8.425369186614763e-06, 'epoch': 0.28} + 28%|██▊ | 3438/12188 [7:21:36<18:08:21, 7.46s/it] 28%|██▊ | 3439/12188 [7:21:43<17:40:11, 7.27s/it] {'loss': 0.3714, 'grad_norm': 0.6621577404937086, 'learning_rate': 8.424401138331302e-06, 'epoch': 0.28} + 28%|██▊ | 3439/12188 [7:21:43<17:40:11, 7.27s/it] 28%|██▊ | 3440/12188 [7:21:50<17:18:35, 7.12s/it] {'loss': 0.4082, 'grad_norm': 0.6844631466382674, 'learning_rate': 8.423432848222135e-06, 'epoch': 0.28} + 28%|██▊ | 3440/12188 [7:21:50<17:18:35, 7.12s/it] 28%|██▊ | 3441/12188 [7:21:58<18:26:20, 7.59s/it] {'loss': 0.3556, 'grad_norm': 0.5845605096291969, 'learning_rate': 8.422464316355638e-06, 'epoch': 0.28} + 28%|██▊ | 3441/12188 [7:21:58<18:26:20, 7.59s/it] 28%|██▊ | 3442/12188 [7:22:06<18:09:12, 7.47s/it] {'loss': 0.3147, 'grad_norm': 0.5664860811533896, 'learning_rate': 8.421495542800214e-06, 'epoch': 0.28} + 28%|██▊ | 3442/12188 [7:22:06<18:09:12, 7.47s/it] 28%|██▊ | 3443/12188 [7:22:13<17:52:47, 7.36s/it] {'loss': 0.3905, 'grad_norm': 0.6937066025830424, 'learning_rate': 8.42052652762427e-06, 'epoch': 0.28} + 28%|██▊ | 3443/12188 [7:22:13<17:52:47, 7.36s/it] 28%|██▊ | 3444/12188 [7:22:20<17:35:26, 7.24s/it] {'loss': 0.3685, 'grad_norm': 0.7190235995480226, 'learning_rate': 8.419557270896235e-06, 'epoch': 0.28} + 28%|██▊ | 3444/12188 [7:22:20<17:35:26, 7.24s/it] 28%|██▊ | 3445/12188 [7:22:27<17:27:39, 7.19s/it] {'loss': 0.3657, 'grad_norm': 0.7161857989304208, 'learning_rate': 8.418587772684561e-06, 'epoch': 0.28} + 28%|██▊ | 3445/12188 [7:22:27<17:27:39, 7.19s/it] 28%|██▊ | 3446/12188 [7:22:34<17:29:26, 7.20s/it] {'loss': 0.3379, 'grad_norm': 0.628734744876603, 'learning_rate': 8.417618033057714e-06, 'epoch': 0.28} + 28%|██▊ | 3446/12188 [7:22:34<17:29:26, 7.20s/it] 28%|██▊ | 3447/12188 [7:22:41<17:15:58, 7.11s/it] {'loss': 0.3773, 'grad_norm': 0.6664825407616755, 'learning_rate': 8.416648052084169e-06, 'epoch': 0.28} + 28%|██▊ | 3447/12188 [7:22:41<17:15:58, 7.11s/it] 28%|██▊ | 3448/12188 [7:22:48<17:18:14, 7.13s/it] {'loss': 0.3801, 'grad_norm': 0.6903571784101009, 'learning_rate': 8.415677829832429e-06, 'epoch': 0.28} + 28%|██▊ | 3448/12188 [7:22:48<17:18:14, 7.13s/it] 28%|██▊ | 3449/12188 [7:22:56<18:00:00, 7.42s/it] {'loss': 0.3861, 'grad_norm': 0.6567758677848439, 'learning_rate': 8.414707366371006e-06, 'epoch': 0.28} + 28%|██▊ | 3449/12188 [7:22:56<18:00:00, 7.42s/it] 28%|██▊ | 3450/12188 [7:23:04<18:08:30, 7.47s/it] {'loss': 0.374, 'grad_norm': 0.6230621965021098, 'learning_rate': 8.41373666176844e-06, 'epoch': 0.28} + 28%|██▊ | 3450/12188 [7:23:04<18:08:30, 7.47s/it] 28%|██▊ | 3451/12188 [7:23:13<19:28:15, 8.02s/it] {'loss': 0.355, 'grad_norm': 0.6431197041430349, 'learning_rate': 8.412765716093273e-06, 'epoch': 0.28} + 28%|██▊ | 3451/12188 [7:23:13<19:28:15, 8.02s/it] 28%|██▊ | 3452/12188 [7:23:20<19:00:50, 7.84s/it] {'loss': 0.3709, 'grad_norm': 0.6381677380991532, 'learning_rate': 8.411794529414073e-06, 'epoch': 0.28} + 28%|██▊ | 3452/12188 [7:23:20<19:00:50, 7.84s/it] 28%|██▊ | 3453/12188 [7:23:28<18:41:34, 7.70s/it] {'loss': 0.3531, 'grad_norm': 0.6641998288457245, 'learning_rate': 8.410823101799425e-06, 'epoch': 0.28} + 28%|██▊ | 3453/12188 [7:23:28<18:41:34, 7.70s/it] 28%|██▊ | 3454/12188 [7:23:35<18:04:54, 7.45s/it] {'loss': 0.4028, 'grad_norm': 0.634658670723116, 'learning_rate': 8.409851433317929e-06, 'epoch': 0.28} + 28%|██▊ | 3454/12188 [7:23:35<18:04:54, 7.45s/it] 28%|██▊ | 3455/12188 [7:23:42<17:51:30, 7.36s/it] {'loss': 0.3618, 'grad_norm': 0.6645372835341995, 'learning_rate': 8.408879524038204e-06, 'epoch': 0.28} + 28%|██▊ | 3455/12188 [7:23:42<17:51:30, 7.36s/it] 28%|██▊ | 3456/12188 [7:23:49<17:41:14, 7.29s/it] {'loss': 0.3963, 'grad_norm': 0.6267727785310939, 'learning_rate': 8.407907374028886e-06, 'epoch': 0.28} + 28%|██▊ | 3456/12188 [7:23:49<17:41:14, 7.29s/it] 28%|██▊ | 3457/12188 [7:23:56<17:34:19, 7.25s/it] {'loss': 0.3541, 'grad_norm': 0.6618094087849503, 'learning_rate': 8.406934983358621e-06, 'epoch': 0.28} + 28%|██▊ | 3457/12188 [7:23:56<17:34:19, 7.25s/it] 28%|██▊ | 3458/12188 [7:24:03<17:27:52, 7.20s/it] {'loss': 0.36, 'grad_norm': 0.6779732944354131, 'learning_rate': 8.405962352096082e-06, 'epoch': 0.28} + 28%|██▊ | 3458/12188 [7:24:03<17:27:52, 7.20s/it] 28%|██▊ | 3459/12188 [7:24:10<17:24:45, 7.18s/it] {'loss': 0.3634, 'grad_norm': 0.6264761716738891, 'learning_rate': 8.404989480309955e-06, 'epoch': 0.28} + 28%|██▊ | 3459/12188 [7:24:10<17:24:45, 7.18s/it] 28%|██▊ | 3460/12188 [7:24:17<17:14:38, 7.11s/it] {'loss': 0.4085, 'grad_norm': 0.6722487147354522, 'learning_rate': 8.404016368068941e-06, 'epoch': 0.28} + 28%|██▊ | 3460/12188 [7:24:17<17:14:38, 7.11s/it] 28%|██▊ | 3461/12188 [7:24:25<17:36:02, 7.26s/it] {'loss': 0.3647, 'grad_norm': 0.7238723605679896, 'learning_rate': 8.40304301544176e-06, 'epoch': 0.28} + 28%|██▊ | 3461/12188 [7:24:25<17:36:02, 7.26s/it] 28%|██▊ | 3462/12188 [7:24:32<17:42:28, 7.31s/it] {'loss': 0.3633, 'grad_norm': 0.6691336897521227, 'learning_rate': 8.402069422497148e-06, 'epoch': 0.28} + 28%|██▊ | 3462/12188 [7:24:32<17:42:28, 7.31s/it] 28%|██▊ | 3463/12188 [7:24:39<17:25:51, 7.19s/it] {'loss': 0.3413, 'grad_norm': 0.7057453073392254, 'learning_rate': 8.40109558930386e-06, 'epoch': 0.28} + 28%|██▊ | 3463/12188 [7:24:39<17:25:51, 7.19s/it] 28%|██▊ | 3464/12188 [7:24:46<16:56:21, 6.99s/it] {'loss': 0.3736, 'grad_norm': 0.735376815807539, 'learning_rate': 8.400121515930665e-06, 'epoch': 0.28} + 28%|██▊ | 3464/12188 [7:24:46<16:56:21, 6.99s/it] 28%|██▊ | 3465/12188 [7:24:52<16:47:27, 6.93s/it] {'loss': 0.3825, 'grad_norm': 0.6801466633729751, 'learning_rate': 8.399147202446352e-06, 'epoch': 0.28} + 28%|██▊ | 3465/12188 [7:24:52<16:47:27, 6.93s/it] 28%|██▊ | 3466/12188 [7:24:59<16:50:14, 6.95s/it] {'loss': 0.3204, 'grad_norm': 0.6192933094015607, 'learning_rate': 8.398172648919724e-06, 'epoch': 0.28} + 28%|██▊ | 3466/12188 [7:24:59<16:50:14, 6.95s/it] 28%|██▊ | 3467/12188 [7:25:06<16:49:57, 6.95s/it] {'loss': 0.374, 'grad_norm': 0.6991923620971567, 'learning_rate': 8.397197855419603e-06, 'epoch': 0.28} + 28%|██▊ | 3467/12188 [7:25:06<16:49:57, 6.95s/it] 28%|██▊ | 3468/12188 [7:25:15<17:52:39, 7.38s/it] {'loss': 0.3732, 'grad_norm': 0.670913021001082, 'learning_rate': 8.39622282201483e-06, 'epoch': 0.28} + 28%|██▊ | 3468/12188 [7:25:15<17:52:39, 7.38s/it] 28%|██▊ | 3469/12188 [7:25:22<17:33:03, 7.25s/it] {'loss': 0.3893, 'grad_norm': 0.6376052709364729, 'learning_rate': 8.395247548774255e-06, 'epoch': 0.28} + 28%|██▊ | 3469/12188 [7:25:22<17:33:03, 7.25s/it] 28%|██▊ | 3470/12188 [7:25:29<17:18:59, 7.15s/it] {'loss': 0.3789, 'grad_norm': 0.7322522515072981, 'learning_rate': 8.394272035766754e-06, 'epoch': 0.28} + 28%|██▊ | 3470/12188 [7:25:29<17:18:59, 7.15s/it] 28%|██▊ | 3471/12188 [7:25:37<18:25:08, 7.61s/it] {'loss': 0.3518, 'grad_norm': 0.7335034628385203, 'learning_rate': 8.393296283061213e-06, 'epoch': 0.28} + 28%|██▊ | 3471/12188 [7:25:37<18:25:08, 7.61s/it] 28%|██▊ | 3472/12188 [7:25:44<17:46:27, 7.34s/it] {'loss': 0.3244, 'grad_norm': 0.6617254072638449, 'learning_rate': 8.392320290726543e-06, 'epoch': 0.28} + 28%|██▊ | 3472/12188 [7:25:44<17:46:27, 7.34s/it] 28%|██▊ | 3473/12188 [7:25:52<18:04:31, 7.47s/it] {'loss': 0.352, 'grad_norm': 0.6625476628455924, 'learning_rate': 8.391344058831664e-06, 'epoch': 0.28} + 28%|██▊ | 3473/12188 [7:25:52<18:04:31, 7.47s/it] 29%|██▊ | 3474/12188 [7:26:00<18:54:26, 7.81s/it] {'loss': 0.391, 'grad_norm': 0.6545296910639843, 'learning_rate': 8.390367587445516e-06, 'epoch': 0.29} + 29%|██▊ | 3474/12188 [7:26:00<18:54:26, 7.81s/it] 29%|██▊ | 3475/12188 [7:26:08<18:56:10, 7.82s/it] {'loss': 0.3997, 'grad_norm': 0.6996455541033507, 'learning_rate': 8.389390876637056e-06, 'epoch': 0.29} + 29%|██▊ | 3475/12188 [7:26:08<18:56:10, 7.82s/it] 29%|██▊ | 3476/12188 [7:26:15<18:18:58, 7.57s/it] {'loss': 0.3899, 'grad_norm': 0.6541767356483655, 'learning_rate': 8.388413926475256e-06, 'epoch': 0.29} + 29%|██▊ | 3476/12188 [7:26:15<18:18:58, 7.57s/it] 29%|██▊ | 3477/12188 [7:26:22<17:40:20, 7.30s/it] {'loss': 0.3645, 'grad_norm': 0.6118599665210402, 'learning_rate': 8.387436737029111e-06, 'epoch': 0.29} + 29%|██▊ | 3477/12188 [7:26:22<17:40:20, 7.30s/it] 29%|██▊ | 3478/12188 [7:26:29<17:37:57, 7.29s/it] {'loss': 0.3549, 'grad_norm': 0.6867180316668599, 'learning_rate': 8.386459308367624e-06, 'epoch': 0.29} + 29%|██▊ | 3478/12188 [7:26:29<17:37:57, 7.29s/it] 29%|██▊ | 3479/12188 [7:26:36<17:32:52, 7.25s/it] {'loss': 0.3498, 'grad_norm': 0.6895838337569483, 'learning_rate': 8.385481640559823e-06, 'epoch': 0.29} + 29%|██▊ | 3479/12188 [7:26:36<17:32:52, 7.25s/it] 29%|██▊ | 3480/12188 [7:26:43<17:20:46, 7.17s/it] {'loss': 0.3645, 'grad_norm': 0.6319090426101271, 'learning_rate': 8.384503733674745e-06, 'epoch': 0.29} + 29%|██▊ | 3480/12188 [7:26:43<17:20:46, 7.17s/it] 29%|██▊ | 3481/12188 [7:26:54<19:44:24, 8.16s/it] {'loss': 0.3356, 'grad_norm': 0.7374892079836192, 'learning_rate': 8.383525587781453e-06, 'epoch': 0.29} + 29%|██▊ | 3481/12188 [7:26:54<19:44:24, 8.16s/it] 29%|██▊ | 3482/12188 [7:27:01<18:53:56, 7.81s/it] {'loss': 0.3791, 'grad_norm': 0.7509844764605954, 'learning_rate': 8.382547202949021e-06, 'epoch': 0.29} + 29%|██▊ | 3482/12188 [7:27:01<18:53:56, 7.81s/it] 29%|██▊ | 3483/12188 [7:27:09<19:20:47, 8.00s/it] {'loss': 0.3521, 'grad_norm': 0.6634690427660834, 'learning_rate': 8.381568579246539e-06, 'epoch': 0.29} + 29%|██▊ | 3483/12188 [7:27:09<19:20:47, 8.00s/it] 29%|██▊ | 3484/12188 [7:27:17<18:51:13, 7.80s/it] {'loss': 0.3586, 'grad_norm': 0.6340079404616704, 'learning_rate': 8.380589716743117e-06, 'epoch': 0.29} + 29%|██▊ | 3484/12188 [7:27:17<18:51:13, 7.80s/it] 29%|██▊ | 3485/12188 [7:27:23<17:55:24, 7.41s/it] {'loss': 0.3764, 'grad_norm': 0.6270316508620339, 'learning_rate': 8.37961061550788e-06, 'epoch': 0.29} + 29%|██▊ | 3485/12188 [7:27:23<17:55:24, 7.41s/it] 29%|██▊ | 3486/12188 [7:27:30<17:38:10, 7.30s/it] {'loss': 0.3465, 'grad_norm': 0.6476940181205523, 'learning_rate': 8.37863127560997e-06, 'epoch': 0.29} + 29%|██▊ | 3486/12188 [7:27:30<17:38:10, 7.30s/it] 29%|██▊ | 3487/12188 [7:27:38<18:04:36, 7.48s/it] {'loss': 0.3991, 'grad_norm': 0.6804904247372864, 'learning_rate': 8.37765169711855e-06, 'epoch': 0.29} + 29%|██▊ | 3487/12188 [7:27:38<18:04:36, 7.48s/it] 29%|██▊ | 3488/12188 [7:27:46<18:28:33, 7.65s/it] {'loss': 0.382, 'grad_norm': 0.8023637783743709, 'learning_rate': 8.376671880102794e-06, 'epoch': 0.29} + 29%|██▊ | 3488/12188 [7:27:46<18:28:33, 7.65s/it] 29%|██▊ | 3489/12188 [7:27:53<18:16:52, 7.57s/it] {'loss': 0.3902, 'grad_norm': 0.7086712056635153, 'learning_rate': 8.375691824631891e-06, 'epoch': 0.29} + 29%|██▊ | 3489/12188 [7:27:53<18:16:52, 7.57s/it] 29%|██▊ | 3490/12188 [7:28:00<17:52:55, 7.40s/it] {'loss': 0.3422, 'grad_norm': 0.7252786448155563, 'learning_rate': 8.374711530775058e-06, 'epoch': 0.29} + 29%|██▊ | 3490/12188 [7:28:00<17:52:55, 7.40s/it] 29%|██▊ | 3491/12188 [7:28:08<18:07:19, 7.50s/it] {'loss': 0.3448, 'grad_norm': 0.6895012477776933, 'learning_rate': 8.373730998601519e-06, 'epoch': 0.29} + 29%|██▊ | 3491/12188 [7:28:08<18:07:19, 7.50s/it] 29%|██▊ | 3492/12188 [7:28:17<19:06:11, 7.91s/it] {'loss': 0.3633, 'grad_norm': 0.7032771358433052, 'learning_rate': 8.372750228180515e-06, 'epoch': 0.29} + 29%|██▊ | 3492/12188 [7:28:17<19:06:11, 7.91s/it] 29%|██▊ | 3493/12188 [7:28:24<18:41:28, 7.74s/it] {'loss': 0.3668, 'grad_norm': 0.7209193396089784, 'learning_rate': 8.37176921958131e-06, 'epoch': 0.29} + 29%|██▊ | 3493/12188 [7:28:24<18:41:28, 7.74s/it] 29%|██▊ | 3494/12188 [7:28:31<17:53:14, 7.41s/it] {'loss': 0.3679, 'grad_norm': 0.8552011541825834, 'learning_rate': 8.37078797287318e-06, 'epoch': 0.29} + 29%|██▊ | 3494/12188 [7:28:31<17:53:14, 7.41s/it] 29%|██▊ | 3495/12188 [7:28:38<17:16:55, 7.16s/it] {'loss': 0.3915, 'grad_norm': 0.6608783433490023, 'learning_rate': 8.369806488125418e-06, 'epoch': 0.29} + 29%|██▊ | 3495/12188 [7:28:38<17:16:55, 7.16s/it] 29%|██▊ | 3496/12188 [7:28:45<17:41:04, 7.32s/it] {'loss': 0.4089, 'grad_norm': 0.7707222522135421, 'learning_rate': 8.368824765407336e-06, 'epoch': 0.29} + 29%|██▊ | 3496/12188 [7:28:45<17:41:04, 7.32s/it] 29%|██▊ | 3497/12188 [7:28:52<17:11:32, 7.12s/it] {'loss': 0.3967, 'grad_norm': 0.6867488796221437, 'learning_rate': 8.367842804788263e-06, 'epoch': 0.29} + 29%|██▊ | 3497/12188 [7:28:52<17:11:32, 7.12s/it] 29%|██▊ | 3498/12188 [7:28:59<17:08:21, 7.10s/it] {'loss': 0.3586, 'grad_norm': 0.6309949907602602, 'learning_rate': 8.36686060633754e-06, 'epoch': 0.29} + 29%|██▊ | 3498/12188 [7:28:59<17:08:21, 7.10s/it] 29%|██▊ | 3499/12188 [7:29:06<16:56:13, 7.02s/it] {'loss': 0.3695, 'grad_norm': 0.6732659043466915, 'learning_rate': 8.36587817012453e-06, 'epoch': 0.29} + 29%|██▊ | 3499/12188 [7:29:06<16:56:13, 7.02s/it] 29%|██▊ | 3500/12188 [7:29:13<17:08:42, 7.10s/it] {'loss': 0.3591, 'grad_norm': 0.6678061687797274, 'learning_rate': 8.364895496218612e-06, 'epoch': 0.29} + 29%|██▊ | 3500/12188 [7:29:13<17:08:42, 7.10s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f45d9ff4950> +[Try #0] Failed to fetch sample 4447311 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f45d9ff4950> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Powered by MediaWiki'"}, {'from': 'gpt', 'value': '\nclick(x=0.9215, y=0.6785)\n'}]} + 29%|██▊ | 3501/12188 [7:29:21<17:28:24, 7.24s/it] {'loss': 0.3564, 'grad_norm': 0.6426574810324396, 'learning_rate': 8.363912584689182e-06, 'epoch': 0.29} + 29%|██▊ | 3501/12188 [7:29:21<17:28:24, 7.24s/it] 29%|██▊ | 3502/12188 [7:29:28<17:11:08, 7.12s/it] {'loss': 0.3571, 'grad_norm': 0.6745034370897223, 'learning_rate': 8.362929435605648e-06, 'epoch': 0.29} + 29%|██▊ | 3502/12188 [7:29:28<17:11:08, 7.12s/it] 29%|██▊ | 3503/12188 [7:29:34<17:02:24, 7.06s/it] {'loss': 0.378, 'grad_norm': 0.7565708504492948, 'learning_rate': 8.361946049037439e-06, 'epoch': 0.29} + 29%|██▊ | 3503/12188 [7:29:34<17:02:24, 7.06s/it] 29%|██▊ | 3504/12188 [7:29:41<16:38:56, 6.90s/it] {'loss': 0.3442, 'grad_norm': 0.6369445780274209, 'learning_rate': 8.360962425054004e-06, 'epoch': 0.29} + 29%|██▊ | 3504/12188 [7:29:41<16:38:56, 6.90s/it] 29%|██▉ | 3505/12188 [7:29:48<16:26:45, 6.82s/it] {'loss': 0.4055, 'grad_norm': 0.618788914373755, 'learning_rate': 8.359978563724802e-06, 'epoch': 0.29} + 29%|██▉ | 3505/12188 [7:29:48<16:26:45, 6.82s/it] 29%|██▉ | 3506/12188 [7:29:55<16:34:49, 6.88s/it] {'loss': 0.3324, 'grad_norm': 0.7177517411269513, 'learning_rate': 8.358994465119311e-06, 'epoch': 0.29} + 29%|██▉ | 3506/12188 [7:29:55<16:34:49, 6.88s/it] 29%|██▉ | 3507/12188 [7:30:02<16:59:03, 7.04s/it] {'loss': 0.3564, 'grad_norm': 0.7088866973686898, 'learning_rate': 8.358010129307027e-06, 'epoch': 0.29} + 29%|██▉ | 3507/12188 [7:30:02<16:59:03, 7.04s/it] 29%|██▉ | 3508/12188 [7:30:09<16:55:29, 7.02s/it] {'loss': 0.3494, 'grad_norm': 0.8180993131154556, 'learning_rate': 8.357025556357464e-06, 'epoch': 0.29} + 29%|██▉ | 3508/12188 [7:30:09<16:55:29, 7.02s/it] 29%|██▉ | 3509/12188 [7:30:17<17:15:58, 7.16s/it] {'loss': 0.3121, 'grad_norm': 0.6899736316426106, 'learning_rate': 8.356040746340151e-06, 'epoch': 0.29} + 29%|██▉ | 3509/12188 [7:30:17<17:15:58, 7.16s/it] 29%|██▉ | 3510/12188 [7:30:23<16:53:46, 7.01s/it] {'loss': 0.3582, 'grad_norm': 0.7738044875567018, 'learning_rate': 8.35505569932463e-06, 'epoch': 0.29} + 29%|██▉ | 3510/12188 [7:30:23<16:53:46, 7.01s/it] 29%|██▉ | 3511/12188 [7:30:33<18:34:08, 7.70s/it] {'loss': 0.3849, 'grad_norm': 0.6817082341127795, 'learning_rate': 8.354070415380466e-06, 'epoch': 0.29} + 29%|██▉ | 3511/12188 [7:30:33<18:34:08, 7.70s/it] 29%|██▉ | 3512/12188 [7:30:43<20:31:51, 8.52s/it] {'loss': 0.3716, 'grad_norm': 0.8160073497138232, 'learning_rate': 8.353084894577238e-06, 'epoch': 0.29} + 29%|██▉ | 3512/12188 [7:30:43<20:31:51, 8.52s/it] 29%|██▉ | 3513/12188 [7:30:51<19:51:40, 8.24s/it] {'loss': 0.3606, 'grad_norm': 0.6391476567442804, 'learning_rate': 8.352099136984541e-06, 'epoch': 0.29} + 29%|██▉ | 3513/12188 [7:30:51<19:51:40, 8.24s/it] 29%|██▉ | 3514/12188 [7:30:57<18:38:49, 7.74s/it] {'loss': 0.3624, 'grad_norm': 0.6259213988616056, 'learning_rate': 8.351113142671991e-06, 'epoch': 0.29} + 29%|██▉ | 3514/12188 [7:30:57<18:38:49, 7.74s/it] 29%|██▉ | 3515/12188 [7:31:04<17:55:29, 7.44s/it] {'loss': 0.3751, 'grad_norm': 0.6443945932579539, 'learning_rate': 8.350126911709215e-06, 'epoch': 0.29} + 29%|██▉ | 3515/12188 [7:31:04<17:55:29, 7.44s/it] 29%|██▉ | 3516/12188 [7:31:11<17:23:40, 7.22s/it] {'loss': 0.3429, 'grad_norm': 0.6382517412307317, 'learning_rate': 8.349140444165857e-06, 'epoch': 0.29} + 29%|██▉ | 3516/12188 [7:31:11<17:23:40, 7.22s/it] 29%|██▉ | 3517/12188 [7:31:17<17:08:01, 7.11s/it] {'loss': 0.3701, 'grad_norm': 0.7389245938388516, 'learning_rate': 8.348153740111582e-06, 'epoch': 0.29} + 29%|██▉ | 3517/12188 [7:31:17<17:08:01, 7.11s/it] 29%|██▉ | 3518/12188 [7:31:25<17:15:02, 7.16s/it] {'loss': 0.3717, 'grad_norm': 0.7365417457639529, 'learning_rate': 8.347166799616069e-06, 'epoch': 0.29} + 29%|██▉ | 3518/12188 [7:31:25<17:15:02, 7.16s/it] 29%|██▉ | 3519/12188 [7:31:32<17:08:31, 7.12s/it] {'loss': 0.3779, 'grad_norm': 0.695274385938636, 'learning_rate': 8.346179622749015e-06, 'epoch': 0.29} + 29%|██▉ | 3519/12188 [7:31:32<17:08:31, 7.12s/it] 29%|██▉ | 3520/12188 [7:31:38<16:53:59, 7.02s/it] {'loss': 0.3758, 'grad_norm': 0.6428473080611645, 'learning_rate': 8.345192209580131e-06, 'epoch': 0.29} + 29%|██▉ | 3520/12188 [7:31:38<16:53:59, 7.02s/it] 29%|██▉ | 3521/12188 [7:31:45<16:43:15, 6.95s/it] {'loss': 0.3961, 'grad_norm': 0.6083095534741604, 'learning_rate': 8.344204560179148e-06, 'epoch': 0.29} + 29%|██▉ | 3521/12188 [7:31:45<16:43:15, 6.95s/it] 29%|██▉ | 3522/12188 [7:31:57<20:21:25, 8.46s/it] {'loss': 0.3291, 'grad_norm': 0.6904731948838451, 'learning_rate': 8.343216674615813e-06, 'epoch': 0.29} + 29%|██▉ | 3522/12188 [7:31:57<20:21:25, 8.46s/it] 29%|██▉ | 3523/12188 [7:32:05<19:32:18, 8.12s/it] {'loss': 0.3703, 'grad_norm': 0.6996120779563204, 'learning_rate': 8.342228552959887e-06, 'epoch': 0.29} + 29%|██▉ | 3523/12188 [7:32:05<19:32:18, 8.12s/it] 29%|██▉ | 3524/12188 [7:32:11<18:21:39, 7.63s/it] {'loss': 0.3286, 'grad_norm': 0.6246277186539626, 'learning_rate': 8.341240195281149e-06, 'epoch': 0.29} + 29%|██▉ | 3524/12188 [7:32:11<18:21:39, 7.63s/it] 29%|██▉ | 3525/12188 [7:32:18<17:55:07, 7.45s/it] {'loss': 0.3994, 'grad_norm': 0.6469852677872273, 'learning_rate': 8.340251601649398e-06, 'epoch': 0.29} + 29%|██▉ | 3525/12188 [7:32:18<17:55:07, 7.45s/it] 29%|██▉ | 3526/12188 [7:32:25<17:50:10, 7.41s/it] {'loss': 0.3475, 'grad_norm': 0.687514337487327, 'learning_rate': 8.339262772134444e-06, 'epoch': 0.29} + 29%|██▉ | 3526/12188 [7:32:25<17:50:10, 7.41s/it] 29%|██▉ | 3527/12188 [7:32:32<17:20:42, 7.21s/it] {'loss': 0.376, 'grad_norm': 0.6986047844914042, 'learning_rate': 8.33827370680612e-06, 'epoch': 0.29} + 29%|██▉ | 3527/12188 [7:32:32<17:20:42, 7.21s/it] 29%|██▉ | 3528/12188 [7:32:39<16:51:40, 7.01s/it] {'loss': 0.3603, 'grad_norm': 0.6738940212270224, 'learning_rate': 8.337284405734269e-06, 'epoch': 0.29} + 29%|██▉ | 3528/12188 [7:32:39<16:51:40, 7.01s/it] 29%|██▉ | 3529/12188 [7:32:47<17:29:55, 7.28s/it] {'loss': 0.3203, 'grad_norm': 0.7794610627358677, 'learning_rate': 8.336294868988755e-06, 'epoch': 0.29} + 29%|���█▉ | 3529/12188 [7:32:47<17:29:55, 7.28s/it] 29%|██▉ | 3530/12188 [7:32:54<17:56:13, 7.46s/it] {'loss': 0.3674, 'grad_norm': 0.7051687526182074, 'learning_rate': 8.335305096639458e-06, 'epoch': 0.29} + 29%|██▉ | 3530/12188 [7:32:54<17:56:13, 7.46s/it] 29%|██▉ | 3531/12188 [7:33:03<18:31:37, 7.70s/it] {'loss': 0.3561, 'grad_norm': 0.6496163141892362, 'learning_rate': 8.334315088756273e-06, 'epoch': 0.29} + 29%|██▉ | 3531/12188 [7:33:03<18:31:37, 7.70s/it] 29%|██▉ | 3532/12188 [7:33:10<17:54:26, 7.45s/it] {'loss': 0.3653, 'grad_norm': 0.6738057931525337, 'learning_rate': 8.333324845409115e-06, 'epoch': 0.29} + 29%|██▉ | 3532/12188 [7:33:10<17:54:26, 7.45s/it] 29%|██▉ | 3533/12188 [7:33:17<17:57:34, 7.47s/it] {'loss': 0.3835, 'grad_norm': 0.6964536277050751, 'learning_rate': 8.33233436666791e-06, 'epoch': 0.29} + 29%|██▉ | 3533/12188 [7:33:17<17:57:34, 7.47s/it] 29%|██▉ | 3534/12188 [7:33:24<17:45:08, 7.38s/it] {'loss': 0.3331, 'grad_norm': 0.6216354018666645, 'learning_rate': 8.331343652602606e-06, 'epoch': 0.29} + 29%|██▉ | 3534/12188 [7:33:24<17:45:08, 7.38s/it] 29%|██▉ | 3535/12188 [7:33:31<17:09:30, 7.14s/it] {'loss': 0.3845, 'grad_norm': 0.7019787455914381, 'learning_rate': 8.330352703283166e-06, 'epoch': 0.29} + 29%|██▉ | 3535/12188 [7:33:31<17:09:30, 7.14s/it] 29%|██▉ | 3536/12188 [7:33:38<17:17:00, 7.19s/it] {'loss': 0.3849, 'grad_norm': 0.6371413596532413, 'learning_rate': 8.329361518779569e-06, 'epoch': 0.29} + 29%|██▉ | 3536/12188 [7:33:38<17:17:00, 7.19s/it] 29%|██▉ | 3537/12188 [7:33:46<17:32:14, 7.30s/it] {'loss': 0.3528, 'grad_norm': 0.6467245350760537, 'learning_rate': 8.328370099161811e-06, 'epoch': 0.29} + 29%|██▉ | 3537/12188 [7:33:46<17:32:14, 7.30s/it] 29%|██▉ | 3538/12188 [7:33:54<18:16:00, 7.60s/it] {'loss': 0.3766, 'grad_norm': 1.1760046002296607, 'learning_rate': 8.327378444499901e-06, 'epoch': 0.29} + 29%|██▉ | 3538/12188 [7:33:54<18:16:00, 7.60s/it] 29%|██▉ | 3539/12188 [7:34:01<17:47:12, 7.40s/it] {'loss': 0.336, 'grad_norm': 0.6343895279065744, 'learning_rate': 8.326386554863874e-06, 'epoch': 0.29} + 29%|██▉ | 3539/12188 [7:34:01<17:47:12, 7.40s/it] 29%|██▉ | 3540/12188 [7:34:08<17:16:04, 7.19s/it] {'loss': 0.3537, 'grad_norm': 0.6772792426794163, 'learning_rate': 8.325394430323773e-06, 'epoch': 0.29} + 29%|██▉ | 3540/12188 [7:34:08<17:16:04, 7.19s/it] 29%|██▉ | 3541/12188 [7:34:16<18:02:26, 7.51s/it] {'loss': 0.3831, 'grad_norm': 0.7064613644768047, 'learning_rate': 8.324402070949658e-06, 'epoch': 0.29} + 29%|██▉ | 3541/12188 [7:34:16<18:02:26, 7.51s/it] 29%|██▉ | 3542/12188 [7:34:23<17:54:45, 7.46s/it] {'loss': 0.3753, 'grad_norm': 0.7803266441814598, 'learning_rate': 8.323409476811612e-06, 'epoch': 0.29} + 29%|██▉ | 3542/12188 [7:34:23<17:54:45, 7.46s/it] 29%|██▉ | 3543/12188 [7:34:32<19:10:41, 7.99s/it] {'loss': 0.3438, 'grad_norm': 0.6637101815904007, 'learning_rate': 8.322416647979726e-06, 'epoch': 0.29} + 29%|██▉ | 3543/12188 [7:34:32<19:10:41, 7.99s/it] 29%|██▉ | 3544/12188 [7:34:39<18:16:15, 7.61s/it] {'loss': 0.3457, 'grad_norm': 0.6063149112767192, 'learning_rate': 8.321423584524116e-06, 'epoch': 0.29} + 29%|██▉ | 3544/12188 [7:34:39<18:16:15, 7.61s/it] 29%|██▉ | 3545/12188 [7:34:46<17:34:38, 7.32s/it] {'loss': 0.4038, 'grad_norm': 0.6974808107826455, 'learning_rate': 8.320430286514908e-06, 'epoch': 0.29} + 29%|██▉ | 3545/12188 [7:34:46<17:34:38, 7.32s/it] 29%|██▉ | 3546/12188 [7:34:53<17:09:23, 7.15s/it] {'loss': 0.3586, 'grad_norm': 0.6469397991586194, 'learning_rate': 8.319436754022247e-06, 'epoch': 0.29} + 29%|██▉ | 3546/12188 [7:34:53<17:09:23, 7.15s/it] 29%|██▉ | 3547/12188 [7:35:00<17:40:56, 7.37s/it] {'loss': 0.3505, 'grad_norm': 0.617980119528497, 'learning_rate': 8.318442987116296e-06, 'epoch': 0.29} + 29%|██▉ | 3547/12188 [7:35:00<17:40:56, 7.37s/it] 29%|██▉ | 3548/12188 [7:35:08<18:04:31, 7.53s/it] {'loss': 0.3281, 'grad_norm': 0.6046028939962098, 'learning_rate': 8.317448985867233e-06, 'epoch': 0.29} + 29%|██▉ | 3548/12188 [7:35:08<18:04:31, 7.53s/it] 29%|██▉ | 3549/12188 [7:35:15<17:36:01, 7.33s/it] {'loss': 0.3916, 'grad_norm': 0.7121508278179322, 'learning_rate': 8.316454750345253e-06, 'epoch': 0.29} + 29%|██▉ | 3549/12188 [7:35:15<17:36:01, 7.33s/it] 29%|██▉ | 3550/12188 [7:35:24<18:14:36, 7.60s/it] {'loss': 0.3737, 'grad_norm': 0.7500961409996909, 'learning_rate': 8.315460280620566e-06, 'epoch': 0.29} + 29%|██▉ | 3550/12188 [7:35:24<18:14:36, 7.60s/it] 29%|██▉ | 3551/12188 [7:35:30<17:38:31, 7.35s/it] {'loss': 0.3293, 'grad_norm': 0.6311074624831574, 'learning_rate': 8.314465576763401e-06, 'epoch': 0.29} + 29%|██▉ | 3551/12188 [7:35:30<17:38:31, 7.35s/it] 29%|██▉ | 3552/12188 [7:35:38<17:37:32, 7.35s/it] {'loss': 0.3803, 'grad_norm': 0.7224201970041401, 'learning_rate': 8.313470638844003e-06, 'epoch': 0.29} + 29%|██▉ | 3552/12188 [7:35:38<17:37:32, 7.35s/it] 29%|██▉ | 3553/12188 [7:35:45<17:27:52, 7.28s/it] {'loss': 0.3687, 'grad_norm': 0.6560335227467121, 'learning_rate': 8.312475466932632e-06, 'epoch': 0.29} + 29%|██▉ | 3553/12188 [7:35:45<17:27:52, 7.28s/it] 29%|██▉ | 3554/12188 [7:35:53<18:22:54, 7.66s/it] {'loss': 0.3307, 'grad_norm': 0.599542053535244, 'learning_rate': 8.311480061099563e-06, 'epoch': 0.29} + 29%|██▉ | 3554/12188 [7:35:53<18:22:54, 7.66s/it] 29%|██▉ | 3555/12188 [7:36:02<19:07:57, 7.98s/it] {'loss': 0.3297, 'grad_norm': 0.6814476287367497, 'learning_rate': 8.310484421415094e-06, 'epoch': 0.29} + 29%|██▉ | 3555/12188 [7:36:02<19:07:57, 7.98s/it] 29%|██▉ | 3556/12188 [7:36:10<18:54:28, 7.89s/it] {'loss': 0.3624, 'grad_norm': 0.6296149373829947, 'learning_rate': 8.309488547949535e-06, 'epoch': 0.29} + 29%|██▉ | 3556/12188 [7:36:10<18:54:28, 7.89s/it] 29%|██▉ | 3557/12188 [7:36:17<18:10:40, 7.58s/it] {'loss': 0.371, 'grad_norm': 0.743423712621411, 'learning_rate': 8.30849244077321e-06, 'epoch': 0.29} + 29%|██▉ | 3557/12188 [7:36:17<18:10:40, 7.58s/it] 29%|██▉ | 3558/12188 [7:36:23<17:31:51, 7.31s/it] {'loss': 0.3324, 'grad_norm': 0.7483993195634401, 'learning_rate': 8.307496099956465e-06, 'epoch': 0.29} + 29%|██▉ | 3558/12188 [7:36:23<17:31:51, 7.31s/it] 29%|██▉ | 3559/12188 [7:36:30<17:01:13, 7.10s/it] {'loss': 0.3549, 'grad_norm': 0.6953397358823545, 'learning_rate': 8.306499525569661e-06, 'epoch': 0.29} + 29%|██▉ | 3559/12188 [7:36:30<17:01:13, 7.10s/it] 29%|██▉ | 3560/12188 [7:36:37<16:46:12, 7.00s/it] {'loss': 0.3628, 'grad_norm': 0.6467697636796358, 'learning_rate': 8.30550271768317e-06, 'epoch': 0.29} + 29%|██▉ | 3560/12188 [7:36:37<16:46:12, 7.00s/it] 29%|██▉ | 3561/12188 [7:36:45<18:01:00, 7.52s/it] {'loss': 0.3719, 'grad_norm': 0.6781305445741366, 'learning_rate': 8.304505676367388e-06, 'epoch': 0.29} + 29%|██▉ | 3561/12188 [7:36:45<18:01:00, 7.52s/it] 29%|██▉ | 3562/12188 [7:36:54<19:08:16, 7.99s/it] {'loss': 0.3798, 'grad_norm': 0.7031623478808917, 'learning_rate': 8.303508401692725e-06, 'epoch': 0.29} + 29%|██▉ | 3562/12188 [7:36:54<19:08:16, 7.99s/it] 29%|██▉ | 3563/12188 [7:37:02<18:37:47, 7.78s/it] {'loss': 0.4223, 'grad_norm': 0.6640199017729365, 'learning_rate': 8.302510893729607e-06, 'epoch': 0.29} + 29%|██▉ | 3563/12188 [7:37:02<18:37:47, 7.78s/it] 29%|██▉ | 3564/12188 [7:37:09<18:14:16, 7.61s/it] {'loss': 0.3779, 'grad_norm': 0.6352306380597317, 'learning_rate': 8.301513152548474e-06, 'epoch': 0.29} + 29%|██▉ | 3564/12188 [7:37:09<18:14:16, 7.61s/it] 29%|██▉ | 3565/12188 [7:37:16<17:33:58, 7.33s/it] {'loss': 0.3455, 'grad_norm': 0.6374775824993386, 'learning_rate': 8.300515178219788e-06, 'epoch': 0.29} + 29%|██▉ | 3565/12188 [7:37:16<17:33:58, 7.33s/it] 29%|██▉ | 3566/12188 [7:37:23<17:24:51, 7.27s/it] {'loss': 0.3933, 'grad_norm': 0.6491430989425522, 'learning_rate': 8.299516970814022e-06, 'epoch': 0.29} + 29%|██▉ | 3566/12188 [7:37:23<17:24:51, 7.27s/it] 29%|██▉ | 3567/12188 [7:37:30<17:16:17, 7.21s/it] {'loss': 0.3681, 'grad_norm': 0.6661220140790688, 'learning_rate': 8.298518530401668e-06, 'epoch': 0.29} + 29%|██▉ | 3567/12188 [7:37:30<17:16:17, 7.21s/it] 29%|██▉ | 3568/12188 [7:37:38<17:51:08, 7.46s/it] {'loss': 0.3622, 'grad_norm': 0.6884004275185375, 'learning_rate': 8.297519857053235e-06, 'epoch': 0.29} + 29%|██▉ | 3568/12188 [7:37:38<17:51:08, 7.46s/it] 29%|██▉ | 3569/12188 [7:37:45<17:29:01, 7.30s/it] {'loss': 0.3783, 'grad_norm': 0.6065103522155028, 'learning_rate': 8.29652095083925e-06, 'epoch': 0.29} + 29%|██▉ | 3569/12188 [7:37:45<17:29:01, 7.30s/it] 29%|██▉ | 3570/12188 [7:37:52<17:06:07, 7.14s/it] {'loss': 0.348, 'grad_norm': 0.659886239340308, 'learning_rate': 8.295521811830249e-06, 'epoch': 0.29} + 29%|██▉ | 3570/12188 [7:37:52<17:06:07, 7.14s/it] 29%|██▉ | 3571/12188 [7:37:59<17:37:37, 7.36s/it] {'loss': 0.3756, 'grad_norm': 0.6111673564009654, 'learning_rate': 8.294522440096793e-06, 'epoch': 0.29} + 29%|██▉ | 3571/12188 [7:37:59<17:37:37, 7.36s/it] 29%|██▉ | 3572/12188 [7:38:06<17:12:46, 7.19s/it] {'loss': 0.3572, 'grad_norm': 0.726655664192741, 'learning_rate': 8.293522835709455e-06, 'epoch': 0.29} + 29%|██▉ | 3572/12188 [7:38:06<17:12:46, 7.19s/it] 29%|██▉ | 3573/12188 [7:38:14<17:27:14, 7.29s/it] {'loss': 0.3457, 'grad_norm': 0.6398650703459081, 'learning_rate': 8.292522998738827e-06, 'epoch': 0.29} + 29%|██▉ | 3573/12188 [7:38:14<17:27:14, 7.29s/it] 29%|██▉ | 3574/12188 [7:38:21<17:18:33, 7.23s/it] {'loss': 0.3844, 'grad_norm': 0.6190328780506671, 'learning_rate': 8.291522929255513e-06, 'epoch': 0.29} + 29%|██▉ | 3574/12188 [7:38:21<17:18:33, 7.23s/it] 29%|██▉ | 3575/12188 [7:38:27<16:50:19, 7.04s/it] {'loss': 0.3989, 'grad_norm': 0.6851343580567714, 'learning_rate': 8.29052262733014e-06, 'epoch': 0.29} + 29%|██▉ | 3575/12188 [7:38:27<16:50:19, 7.04s/it] 29%|██▉ | 3576/12188 [7:38:35<17:10:05, 7.18s/it] {'loss': 0.3372, 'grad_norm': 0.6307561256957867, 'learning_rate': 8.289522093033345e-06, 'epoch': 0.29} + 29%|██▉ | 3576/12188 [7:38:35<17:10:05, 7.18s/it] 29%|██▉ | 3577/12188 [7:38:42<16:58:56, 7.10s/it] {'loss': 0.366, 'grad_norm': 0.634359172510698, 'learning_rate': 8.288521326435784e-06, 'epoch': 0.29} + 29%|██▉ | 3577/12188 [7:38:42<16:58:56, 7.10s/it] 29%|██▉ | 3578/12188 [7:38:49<16:41:06, 6.98s/it] {'loss': 0.3786, 'grad_norm': 0.6503487387943174, 'learning_rate': 8.28752032760813e-06, 'epoch': 0.29} + 29%|██▉ | 3578/12188 [7:38:49<16:41:06, 6.98s/it] 29%|██▉ | 3579/12188 [7:38:56<16:59:10, 7.10s/it] {'loss': 0.3478, 'grad_norm': 0.6432810925259413, 'learning_rate': 8.286519096621073e-06, 'epoch': 0.29} + 29%|██▉ | 3579/12188 [7:38:56<16:59:10, 7.10s/it] 29%|██▉ | 3580/12188 [7:39:04<17:24:56, 7.28s/it] {'loss': 0.3611, 'grad_norm': 0.6029727401342846, 'learning_rate': 8.285517633545317e-06, 'epoch': 0.29} + 29%|██▉ | 3580/12188 [7:39:04<17:24:56, 7.28s/it] 29%|██▉ | 3581/12188 [7:39:11<17:35:19, 7.36s/it] {'loss': 0.3099, 'grad_norm': 0.6089744103165244, 'learning_rate': 8.284515938451586e-06, 'epoch': 0.29} + 29%|██▉ | 3581/12188 [7:39:11<17:35:19, 7.36s/it] 29%|██▉ | 3582/12188 [7:39:18<17:31:05, 7.33s/it] {'loss': 0.3587, 'grad_norm': 0.6665730547037353, 'learning_rate': 8.283514011410616e-06, 'epoch': 0.29} + 29%|██▉ | 3582/12188 [7:39:18<17:31:05, 7.33s/it] 29%|██▉ | 3583/12188 [7:39:25<17:14:09, 7.21s/it] {'loss': 0.3445, 'grad_norm': 0.6442164303776041, 'learning_rate': 8.28251185249316e-06, 'epoch': 0.29} + 29%|██▉ | 3583/12188 [7:39:25<17:14:09, 7.21s/it] 29%|██▉ | 3584/12188 [7:39:32<16:55:15, 7.08s/it] {'loss': 0.3463, 'grad_norm': 0.7230755398038096, 'learning_rate': 8.281509461769992e-06, 'epoch': 0.29} + 29%|██▉ | 3584/12188 [7:39:32<16:55:15, 7.08s/it] 29%|██▉ | 3585/12188 [7:39:39<16:50:49, 7.05s/it] {'loss': 0.3754, 'grad_norm': 0.631047798262599, 'learning_rate': 8.280506839311898e-06, 'epoch': 0.29} + 29%|██▉ | 3585/12188 [7:39:39<16:50:49, 7.05s/it] 29%|██▉ | 3586/12188 [7:39:47<17:19:21, 7.25s/it] {'loss': 0.3921, 'grad_norm': 0.6642789640860559, 'learning_rate': 8.27950398518968e-06, 'epoch': 0.29} + 29%|██▉ | 3586/12188 [7:39:47<17:19:21, 7.25s/it] 29%|██▉ | 3587/12188 [7:39:53<16:47:31, 7.03s/it] {'loss': 0.3421, 'grad_norm': 0.6923855876030156, 'learning_rate': 8.278500899474162e-06, 'epoch': 0.29} + 29%|██▉ | 3587/12188 [7:39:53<16:47:31, 7.03s/it] 29%|██▉ | 3588/12188 [7:40:00<16:31:47, 6.92s/it] {'loss': 0.3528, 'grad_norm': 0.6224219452471373, 'learning_rate': 8.277497582236177e-06, 'epoch': 0.29} + 29%|██▉ | 3588/12188 [7:40:00<16:31:47, 6.92s/it] 29%|██▉ | 3589/12188 [7:40:08<16:56:37, 7.09s/it] {'loss': 0.3926, 'grad_norm': 0.6973986957952036, 'learning_rate': 8.276494033546579e-06, 'epoch': 0.29} + 29%|██▉ | 3589/12188 [7:40:08<16:56:37, 7.09s/it] 29%|██▉ | 3590/12188 [7:40:15<17:21:58, 7.27s/it] {'loss': 0.3567, 'grad_norm': 0.6838799524426902, 'learning_rate': 8.275490253476236e-06, 'epoch': 0.29} + 29%|██▉ | 3590/12188 [7:40:15<17:21:58, 7.27s/it] 29%|██▉ | 3591/12188 [7:40:22<17:05:22, 7.16s/it] {'loss': 0.392, 'grad_norm': 0.6817997931166102, 'learning_rate': 8.274486242096032e-06, 'epoch': 0.29} + 29%|██▉ | 3591/12188 [7:40:22<17:05:22, 7.16s/it] 29%|██▉ | 3592/12188 [7:40:30<17:20:57, 7.27s/it] {'loss': 0.3694, 'grad_norm': 0.7401633047104399, 'learning_rate': 8.273481999476871e-06, 'epoch': 0.29} + 29%|██▉ | 3592/12188 [7:40:30<17:20:57, 7.27s/it] 29%|██▉ | 3593/12188 [7:40:37<17:11:17, 7.20s/it] {'loss': 0.3645, 'grad_norm': 0.5708210793289804, 'learning_rate': 8.272477525689672e-06, 'epoch': 0.29} + 29%|██▉ | 3593/12188 [7:40:37<17:11:17, 7.20s/it] 29%|██▉ | 3594/12188 [7:40:44<17:08:27, 7.18s/it] {'loss': 0.3941, 'grad_norm': 0.7897511779879776, 'learning_rate': 8.271472820805367e-06, 'epoch': 0.29} + 29%|██▉ | 3594/12188 [7:40:44<17:08:27, 7.18s/it] 29%|██▉ | 3595/12188 [7:40:50<16:46:13, 7.03s/it] {'loss': 0.3368, 'grad_norm': 0.6444597126779538, 'learning_rate': 8.270467884894908e-06, 'epoch': 0.29} + 29%|██▉ | 3595/12188 [7:40:50<16:46:13, 7.03s/it] 30%|██▉ | 3596/12188 [7:40:58<16:51:41, 7.06s/it] {'loss': 0.3869, 'grad_norm': 0.6621667758677521, 'learning_rate': 8.26946271802926e-06, 'epoch': 0.3} + 30%|██▉ | 3596/12188 [7:40:58<16:51:41, 7.06s/it] 30%|██▉ | 3597/12188 [7:41:04<16:24:47, 6.88s/it] {'loss': 0.3869, 'grad_norm': 0.7496557959018151, 'learning_rate': 8.268457320279408e-06, 'epoch': 0.3} + 30%|██▉ | 3597/12188 [7:41:04<16:24:47, 6.88s/it] 30%|██▉ | 3598/12188 [7:41:11<16:38:24, 6.97s/it] {'loss': 0.3469, 'grad_norm': 0.6657602796353397, 'learning_rate': 8.267451691716352e-06, 'epoch': 0.3} + 30%|██▉ | 3598/12188 [7:41:11<16:38:24, 6.97s/it] 30%|██▉ | 3599/12188 [7:41:18<16:27:42, 6.90s/it] {'loss': 0.3816, 'grad_norm': 0.5883409943559883, 'learning_rate': 8.266445832411108e-06, 'epoch': 0.3} + 30%|██▉ | 3599/12188 [7:41:18<16:27:42, 6.90s/it] 30%|██▉ | 3600/12188 [7:41:25<16:39:56, 6.99s/it] {'loss': 0.364, 'grad_norm': 0.6000569286096555, 'learning_rate': 8.265439742434706e-06, 'epoch': 0.3} + 30%|██▉ | 3600/12188 [7:41:25<16:39:56, 6.99s/it] 30%|██▉ | 3601/12188 [7:41:32<16:51:54, 7.07s/it] {'loss': 0.3605, 'grad_norm': 0.6274879973377376, 'learning_rate': 8.264433421858195e-06, 'epoch': 0.3} + 30%|██▉ | 3601/12188 [7:41:32<16:51:54, 7.07s/it] 30%|██▉ | 3602/12188 [7:41:40<17:01:17, 7.14s/it] {'loss': 0.3655, 'grad_norm': 0.7000890237919551, 'learning_rate': 8.26342687075264e-06, 'epoch': 0.3} + 30%|██▉ | 3602/12188 [7:41:40<17:01:17, 7.14s/it] 30%|██▉ | 3603/12188 [7:41:46<16:44:02, 7.02s/it] {'loss': 0.3435, 'grad_norm': 0.6375886239238504, 'learning_rate': 8.262420089189124e-06, 'epoch': 0.3} + 30%|██▉ | 3603/12188 [7:41:46<16:44:02, 7.02s/it] 30%|██▉ | 3604/12188 [7:41:54<17:06:45, 7.18s/it] {'loss': 0.3549, 'grad_norm': 0.6244918847281141, 'learning_rate': 8.261413077238743e-06, 'epoch': 0.3} + 30%|██▉ | 3604/12188 [7:41:54<17:06:45, 7.18s/it] 30%|██▉ | 3605/12188 [7:42:01<16:47:20, 7.04s/it] {'loss': 0.3432, 'grad_norm': 0.7191676372032562, 'learning_rate': 8.260405834972611e-06, 'epoch': 0.3} + 30%|██▉ | 3605/12188 [7:42:01<16:47:20, 7.04s/it] 30%|██▉ | 3606/12188 [7:42:09<17:23:09, 7.29s/it] {'loss': 0.3376, 'grad_norm': 0.7332926621092345, 'learning_rate': 8.259398362461855e-06, 'epoch': 0.3} + 30%|██▉ | 3606/12188 [7:42:09<17:23:09, 7.29s/it] 30%|██▉ | 3607/12188 [7:42:16<17:19:49, 7.27s/it] {'loss': 0.3207, 'grad_norm': 0.5967374039342035, 'learning_rate': 8.258390659777625e-06, 'epoch': 0.3} + 30%|██▉ | 3607/12188 [7:42:16<17:19:49, 7.27s/it] 30%|██▉ | 3608/12188 [7:42:24<17:47:34, 7.47s/it] {'loss': 0.3454, 'grad_norm': 0.6269103921117167, 'learning_rate': 8.257382726991081e-06, 'epoch': 0.3} + 30%|██▉ | 3608/12188 [7:42:24<17:47:34, 7.47s/it] 30%|██▉ | 3609/12188 [7:42:35<20:14:44, 8.50s/it] {'loss': 0.3975, 'grad_norm': 0.6311135144322346, 'learning_rate': 8.256374564173401e-06, 'epoch': 0.3} + 30%|██▉ | 3609/12188 [7:42:35<20:14:44, 8.50s/it] 30%|██▉ | 3610/12188 [7:42:42<19:04:35, 8.01s/it] {'loss': 0.3774, 'grad_norm': 0.7973713007874345, 'learning_rate': 8.255366171395783e-06, 'epoch': 0.3} + 30%|██▉ | 3610/12188 [7:42:42<19:04:35, 8.01s/it] 30%|██▉ | 3611/12188 [7:42:49<18:32:00, 7.78s/it] {'loss': 0.4193, 'grad_norm': 0.7319646606256761, 'learning_rate': 8.254357548729435e-06, 'epoch': 0.3} + 30%|██▉ | 3611/12188 [7:42:49<18:32:00, 7.78s/it] 30%|██▉ | 3612/12188 [7:42:56<18:06:15, 7.60s/it] {'loss': 0.377, 'grad_norm': 0.6740943599425254, 'learning_rate': 8.253348696245586e-06, 'epoch': 0.3} + 30%|██▉ | 3612/12188 [7:42:56<18:06:15, 7.60s/it] 30%|██▉ | 3613/12188 [7:43:03<17:40:16, 7.42s/it] {'loss': 0.3358, 'grad_norm': 0.6090005937134895, 'learning_rate': 8.252339614015478e-06, 'epoch': 0.3} + 30%|██▉ | 3613/12188 [7:43:03<17:40:16, 7.42s/it] 30%|██▉ | 3614/12188 [7:43:10<17:32:43, 7.37s/it] {'loss': 0.3478, 'grad_norm': 0.659427167593826, 'learning_rate': 8.251330302110373e-06, 'epoch': 0.3} + 30%|██▉ | 3614/12188 [7:43:10<17:32:43, 7.37s/it] 30%|██▉ | 3615/12188 [7:43:18<18:00:30, 7.56s/it] {'loss': 0.4057, 'grad_norm': 0.650235064877028, 'learning_rate': 8.250320760601544e-06, 'epoch': 0.3} + 30%|██▉ | 3615/12188 [7:43:18<18:00:30, 7.56s/it] 30%|██▉ | 3616/12188 [7:43:26<18:22:22, 7.72s/it] {'loss': 0.3339, 'grad_norm': 0.6365065767178258, 'learning_rate': 8.249310989560286e-06, 'epoch': 0.3} + 30%|██▉ | 3616/12188 [7:43:26<18:22:22, 7.72s/it] 30%|██▉ | 3617/12188 [7:43:33<17:58:29, 7.55s/it] {'loss': 0.366, 'grad_norm': 0.8126977743513947, 'learning_rate': 8.248300989057906e-06, 'epoch': 0.3} + 30%|██▉ | 3617/12188 [7:43:33<17:58:29, 7.55s/it] 30%|██▉ | 3618/12188 [7:43:40<17:28:11, 7.34s/it] {'loss': 0.3888, 'grad_norm': 0.6695079211895333, 'learning_rate': 8.247290759165731e-06, 'epoch': 0.3} + 30%|██▉ | 3618/12188 [7:43:40<17:28:11, 7.34s/it] 30%|██▉ | 3619/12188 [7:43:47<16:57:52, 7.13s/it] {'loss': 0.3922, 'grad_norm': 0.6238956985646354, 'learning_rate': 8.246280299955096e-06, 'epoch': 0.3} + 30%|██▉ | 3619/12188 [7:43:47<16:57:52, 7.13s/it] 30%|██▉ | 3620/12188 [7:43:56<18:04:44, 7.60s/it] {'loss': 0.3545, 'grad_norm': 0.6102053837218336, 'learning_rate': 8.245269611497365e-06, 'epoch': 0.3} + 30%|██▉ | 3620/12188 [7:43:56<18:04:44, 7.60s/it] 30%|██▉ | 3621/12188 [7:44:04<18:24:28, 7.74s/it] {'loss': 0.3391, 'grad_norm': 0.7108778581671541, 'learning_rate': 8.244258693863907e-06, 'epoch': 0.3} + 30%|██▉ | 3621/12188 [7:44:04<18:24:28, 7.74s/it] 30%|██▉ | 3622/12188 [7:44:10<17:37:22, 7.41s/it] {'loss': 0.3457, 'grad_norm': 0.6563424455626311, 'learning_rate': 8.243247547126112e-06, 'epoch': 0.3} + 30%|██▉ | 3622/12188 [7:44:10<17:37:22, 7.41s/it] 30%|██▉ | 3623/12188 [7:44:17<17:12:38, 7.23s/it] {'loss': 0.3618, 'grad_norm': 0.638259877486403, 'learning_rate': 8.242236171355386e-06, 'epoch': 0.3} + 30%|██▉ | 3623/12188 [7:44:17<17:12:38, 7.23s/it] 30%|██▉ | 3624/12188 [7:44:24<16:59:58, 7.15s/it] {'loss': 0.3657, 'grad_norm': 0.6771981051182538, 'learning_rate': 8.24122456662315e-06, 'epoch': 0.3} + 30%|██▉ | 3624/12188 [7:44:24<16:59:58, 7.15s/it] 30%|██▉ | 3625/12188 [7:44:31<16:49:29, 7.07s/it] {'loss': 0.3566, 'grad_norm': 0.6416815625004777, 'learning_rate': 8.240212733000844e-06, 'epoch': 0.3} + 30%|██▉ | 3625/12188 [7:44:31<16:49:29, 7.07s/it] 30%|██▉ | 3626/12188 [7:44:38<16:44:52, 7.04s/it] {'loss': 0.3551, 'grad_norm': 0.6898132822501467, 'learning_rate': 8.23920067055992e-06, 'epoch': 0.3} + 30%|██▉ | 3626/12188 [7:44:38<16:44:52, 7.04s/it] 30%|██▉ | 3627/12188 [7:44:45<16:25:23, 6.91s/it] {'loss': 0.4062, 'grad_norm': 0.7824184177715442, 'learning_rate': 8.23818837937185e-06, 'epoch': 0.3} + 30%|██▉ | 3627/12188 [7:44:45<16:25:23, 6.91s/it] 30%|██▉ | 3628/12188 [7:44:52<16:33:05, 6.96s/it] {'loss': 0.3706, 'grad_norm': 0.7533786266343193, 'learning_rate': 8.237175859508116e-06, 'epoch': 0.3} + 30%|██▉ | 3628/12188 [7:44:52<16:33:05, 6.96s/it] 30%|██▉ | 3629/12188 [7:44:59<16:32:58, 6.96s/it] {'loss': 0.3697, 'grad_norm': 0.6345265818044278, 'learning_rate': 8.236163111040227e-06, 'epoch': 0.3} + 30%|██▉ | 3629/12188 [7:44:59<16:32:58, 6.96s/it] 30%|██▉ | 3630/12188 [7:45:06<16:43:44, 7.04s/it] {'loss': 0.4033, 'grad_norm': 0.6691662625736022, 'learning_rate': 8.235150134039696e-06, 'epoch': 0.3} + 30%|██▉ | 3630/12188 [7:45:06<16:43:44, 7.04s/it] 30%|██▉ | 3631/12188 [7:45:13<16:47:48, 7.07s/it] {'loss': 0.3525, 'grad_norm': 0.6442645788282831, 'learning_rate': 8.23413692857806e-06, 'epoch': 0.3} + 30%|██▉ | 3631/12188 [7:45:13<16:47:48, 7.07s/it] 30%|██▉ | 3632/12188 [7:45:20<16:41:26, 7.02s/it] {'loss': 0.3751, 'grad_norm': 0.6552997179892466, 'learning_rate': 8.233123494726873e-06, 'epoch': 0.3} + 30%|██▉ | 3632/12188 [7:45:20<16:41:26, 7.02s/it] 30%|██▉ | 3633/12188 [7:45:27<16:44:45, 7.05s/it] {'loss': 0.4455, 'grad_norm': 0.6922764908281266, 'learning_rate': 8.232109832557696e-06, 'epoch': 0.3} + 30%|██▉ | 3633/12188 [7:45:27<16:44:45, 7.05s/it] 30%|██▉ | 3634/12188 [7:45:36<17:59:57, 7.58s/it] {'loss': 0.3425, 'grad_norm': 0.5897808517142249, 'learning_rate': 8.231095942142117e-06, 'epoch': 0.3} + 30%|██▉ | 3634/12188 [7:45:36<17:59:57, 7.58s/it] 30%|██▉ | 3635/12188 [7:45:43<17:26:22, 7.34s/it] {'loss': 0.3404, 'grad_norm': 0.7653187201466285, 'learning_rate': 8.230081823551734e-06, 'epoch': 0.3} + 30%|██▉ | 3635/12188 [7:45:43<17:26:22, 7.34s/it] 30%|██▉ | 3636/12188 [7:45:50<17:17:29, 7.28s/it] {'loss': 0.3615, 'grad_norm': 0.7159661955567239, 'learning_rate': 8.229067476858162e-06, 'epoch': 0.3} + 30%|██▉ | 3636/12188 [7:45:50<17:17:29, 7.28s/it] 30%|██▉ | 3637/12188 [7:45:57<17:04:39, 7.19s/it] {'loss': 0.3661, 'grad_norm': 0.7861930509592386, 'learning_rate': 8.228052902133032e-06, 'epoch': 0.3} + 30%|██▉ | 3637/12188 [7:45:57<17:04:39, 7.19s/it] 30%|██▉ | 3638/12188 [7:46:04<16:54:27, 7.12s/it] {'loss': 0.3647, 'grad_norm': 0.7115417254867795, 'learning_rate': 8.227038099447992e-06, 'epoch': 0.3} + 30%|██▉ | 3638/12188 [7:46:04<16:54:27, 7.12s/it] 30%|██▉ | 3639/12188 [7:46:11<16:50:27, 7.09s/it] {'loss': 0.3542, 'grad_norm': 0.6302883119751456, 'learning_rate': 8.226023068874707e-06, 'epoch': 0.3} + 30%|██▉ | 3639/12188 [7:46:11<16:50:27, 7.09s/it] 30%|██▉ | 3640/12188 [7:46:19<17:35:28, 7.41s/it] {'loss': 0.367, 'grad_norm': 0.6434313628920583, 'learning_rate': 8.225007810484857e-06, 'epoch': 0.3} + 30%|██▉ | 3640/12188 [7:46:19<17:35:28, 7.41s/it] 30%|██▉ | 3641/12188 [7:46:26<17:12:05, 7.25s/it] {'loss': 0.355, 'grad_norm': 0.6547364429458058, 'learning_rate': 8.223992324350136e-06, 'epoch': 0.3} + 30%|██▉ | 3641/12188 [7:46:26<17:12:05, 7.25s/it] 30%|██▉ | 3642/12188 [7:46:32<16:40:53, 7.03s/it] {'loss': 0.3503, 'grad_norm': 0.6556560205467691, 'learning_rate': 8.222976610542255e-06, 'epoch': 0.3} + 30%|██▉ | 3642/12188 [7:46:32<16:40:53, 7.03s/it] 30%|██▉ | 3643/12188 [7:46:39<16:28:22, 6.94s/it] {'loss': 0.3697, 'grad_norm': 0.6343337182429735, 'learning_rate': 8.221960669132946e-06, 'epoch': 0.3} + 30%|██▉ | 3643/12188 [7:46:39<16:28:22, 6.94s/it] 30%|██▉ | 3644/12188 [7:46:47<16:57:41, 7.15s/it] {'loss': 0.3538, 'grad_norm': 0.6660543025783155, 'learning_rate': 8.22094450019395e-06, 'epoch': 0.3} + 30%|██▉ | 3644/12188 [7:46:47<16:57:41, 7.15s/it] 30%|██▉ | 3645/12188 [7:46:53<16:35:19, 6.99s/it] {'loss': 0.3676, 'grad_norm': 0.6323255188268146, 'learning_rate': 8.219928103797027e-06, 'epoch': 0.3} + 30%|██▉ | 3645/12188 [7:46:53<16:35:19, 6.99s/it] 30%|██▉ | 3646/12188 [7:47:01<17:18:18, 7.29s/it] {'loss': 0.3597, 'grad_norm': 0.7331387710855694, 'learning_rate': 8.218911480013956e-06, 'epoch': 0.3} + 30%|██▉ | 3646/12188 [7:47:01<17:18:18, 7.29s/it] 30%|██▉ | 3647/12188 [7:47:10<18:14:48, 7.69s/it] {'loss': 0.3579, 'grad_norm': 0.7358359268865273, 'learning_rate': 8.217894628916529e-06, 'epoch': 0.3} + 30%|██▉ | 3647/12188 [7:47:10<18:14:48, 7.69s/it] 30%|██▉ | 3648/12188 [7:47:17<17:37:53, 7.43s/it] {'loss': 0.3817, 'grad_norm': 0.7247127536082223, 'learning_rate': 8.216877550576552e-06, 'epoch': 0.3} + 30%|██▉ | 3648/12188 [7:47:17<17:37:53, 7.43s/it] 30%|██▉ | 3649/12188 [7:47:24<17:25:25, 7.35s/it] {'loss': 0.3536, 'grad_norm': 0.6026506284120019, 'learning_rate': 8.21586024506585e-06, 'epoch': 0.3} + 30%|██▉ | 3649/12188 [7:47:24<17:25:25, 7.35s/it] 30%|██▉ | 3650/12188 [7:47:34<19:19:36, 8.15s/it] {'loss': 0.3631, 'grad_norm': 0.7646876979700887, 'learning_rate': 8.214842712456266e-06, 'epoch': 0.3} + 30%|██▉ | 3650/12188 [7:47:34<19:19:36, 8.15s/it] 30%|██▉ | 3651/12188 [7:47:41<18:43:43, 7.90s/it] {'loss': 0.3635, 'grad_norm': 0.6205685296821226, 'learning_rate': 8.213824952819653e-06, 'epoch': 0.3} + 30%|██▉ | 3651/12188 [7:47:41<18:43:43, 7.90s/it] 30%|██▉ | 3652/12188 [7:47:48<18:16:31, 7.71s/it] {'loss': 0.3738, 'grad_norm': 0.654455213911147, 'learning_rate': 8.212806966227884e-06, 'epoch': 0.3} + 30%|██▉ | 3652/12188 [7:47:48<18:16:31, 7.71s/it] 30%|██▉ | 3653/12188 [7:47:58<19:24:30, 8.19s/it] {'loss': 0.3719, 'grad_norm': 0.6389881875571665, 'learning_rate': 8.211788752752851e-06, 'epoch': 0.3} + 30%|██▉ | 3653/12188 [7:47:58<19:24:30, 8.19s/it] 30%|██▉ | 3654/12188 [7:48:05<18:33:36, 7.83s/it] {'loss': 0.3548, 'grad_norm': 0.6059923394151949, 'learning_rate': 8.210770312466457e-06, 'epoch': 0.3} + 30%|██▉ | 3654/12188 [7:48:05<18:33:36, 7.83s/it] 30%|██▉ | 3655/12188 [7:48:12<18:09:03, 7.66s/it] {'loss': 0.3524, 'grad_norm': 0.6299163066578031, 'learning_rate': 8.20975164544062e-06, 'epoch': 0.3} + 30%|██▉ | 3655/12188 [7:48:12<18:09:03, 7.66s/it] 30%|██▉ | 3656/12188 [7:48:20<18:19:58, 7.74s/it] {'loss': 0.3598, 'grad_norm': 0.6392035697540956, 'learning_rate': 8.208732751747281e-06, 'epoch': 0.3} + 30%|██▉ | 3656/12188 [7:48:20<18:19:58, 7.74s/it] 30%|███ | 3657/12188 [7:48:28<18:24:51, 7.77s/it] {'loss': 0.3731, 'grad_norm': 0.6513946533404006, 'learning_rate': 8.20771363145839e-06, 'epoch': 0.3} + 30%|███ | 3657/12188 [7:48:28<18:24:51, 7.77s/it] 30%|███ | 3658/12188 [7:48:34<17:39:43, 7.45s/it] {'loss': 0.3583, 'grad_norm': 0.6456693432294469, 'learning_rate': 8.206694284645914e-06, 'epoch': 0.3} + 30%|███ | 3658/12188 [7:48:34<17:39:43, 7.45s/it] 30%|███ | 3659/12188 [7:48:41<17:13:24, 7.27s/it] {'loss': 0.366, 'grad_norm': 0.6453263295597355, 'learning_rate': 8.205674711381841e-06, 'epoch': 0.3} + 30%|███ | 3659/12188 [7:48:41<17:13:24, 7.27s/it] 30%|███ | 3660/12188 [7:48:48<16:53:33, 7.13s/it] {'loss': 0.3768, 'grad_norm': 0.7561994766024434, 'learning_rate': 8.204654911738169e-06, 'epoch': 0.3} + 30%|███ | 3660/12188 [7:48:48<16:53:33, 7.13s/it] 30%|███ | 3661/12188 [7:48:57<17:50:28, 7.53s/it] {'loss': 0.3586, 'grad_norm': 0.7452132925867124, 'learning_rate': 8.203634885786916e-06, 'epoch': 0.3} + 30%|███ | 3661/12188 [7:48:57<17:50:28, 7.53s/it] 30%|███ | 3662/12188 [7:49:04<17:49:44, 7.53s/it] {'loss': 0.356, 'grad_norm': 0.5761056620366076, 'learning_rate': 8.202614633600116e-06, 'epoch': 0.3} + 30%|███ | 3662/12188 [7:49:04<17:49:44, 7.53s/it] 30%|███ | 3663/12188 [7:49:11<17:28:16, 7.38s/it] {'loss': 0.3407, 'grad_norm': 0.6947477641576664, 'learning_rate': 8.201594155249814e-06, 'epoch': 0.3} + 30%|███ | 3663/12188 [7:49:11<17:28:16, 7.38s/it] 30%|███ | 3664/12188 [7:49:18<17:04:59, 7.21s/it] {'loss': 0.3675, 'grad_norm': 0.6271732766471348, 'learning_rate': 8.200573450808079e-06, 'epoch': 0.3} + 30%|███ | 3664/12188 [7:49:18<17:04:59, 7.21s/it] 30%|███ | 3665/12188 [7:49:24<16:32:40, 6.99s/it] {'loss': 0.4249, 'grad_norm': 0.715113388129879, 'learning_rate': 8.199552520346985e-06, 'epoch': 0.3} + 30%|███ | 3665/12188 [7:49:24<16:32:40, 6.99s/it] 30%|███ | 3666/12188 [7:49:33<17:39:26, 7.46s/it] {'loss': 0.3456, 'grad_norm': 0.6778471170928092, 'learning_rate': 8.198531363938635e-06, 'epoch': 0.3} + 30%|███ | 3666/12188 [7:49:33<17:39:26, 7.46s/it] 30%|███ | 3667/12188 [7:49:40<17:27:21, 7.37s/it] {'loss': 0.3327, 'grad_norm': 0.5704621991035866, 'learning_rate': 8.19750998165514e-06, 'epoch': 0.3} + 30%|███ | 3667/12188 [7:49:40<17:27:21, 7.37s/it] 30%|███ | 3668/12188 [7:49:50<19:23:25, 8.19s/it] {'loss': 0.3395, 'grad_norm': 0.6890884398981182, 'learning_rate': 8.196488373568627e-06, 'epoch': 0.3} + 30%|███ | 3668/12188 [7:49:50<19:23:25, 8.19s/it] 30%|███ | 3669/12188 [7:49:57<18:36:41, 7.86s/it] {'loss': 0.3454, 'grad_norm': 0.6650477986619512, 'learning_rate': 8.195466539751237e-06, 'epoch': 0.3} + 30%|███ | 3669/12188 [7:49:57<18:36:41, 7.86s/it] 30%|███ | 3670/12188 [7:50:05<18:36:16, 7.86s/it] {'loss': 0.375, 'grad_norm': 0.7072168083765741, 'learning_rate': 8.194444480275138e-06, 'epoch': 0.3} + 30%|███ | 3670/12188 [7:50:05<18:36:16, 7.86s/it] 30%|███ | 3671/12188 [7:50:13<18:17:54, 7.73s/it] {'loss': 0.3524, 'grad_norm': 0.6267027951103582, 'learning_rate': 8.1934221952125e-06, 'epoch': 0.3} + 30%|███ | 3671/12188 [7:50:13<18:17:54, 7.73s/it] 30%|███ | 3672/12188 [7:50:20<17:50:14, 7.54s/it] {'loss': 0.3607, 'grad_norm': 0.7339586146156185, 'learning_rate': 8.192399684635518e-06, 'epoch': 0.3} + 30%|███ | 3672/12188 [7:50:20<17:50:14, 7.54s/it] 30%|███ | 3673/12188 [7:50:27<17:21:21, 7.34s/it] {'loss': 0.3138, 'grad_norm': 0.6401070773412432, 'learning_rate': 8.1913769486164e-06, 'epoch': 0.3} + 30%|███ | 3673/12188 [7:50:27<17:21:21, 7.34s/it] 30%|███ | 3674/12188 [7:50:34<17:24:58, 7.36s/it] {'loss': 0.3801, 'grad_norm': 0.6819634284938385, 'learning_rate': 8.190353987227368e-06, 'epoch': 0.3} + 30%|███ | 3674/12188 [7:50:34<17:24:58, 7.36s/it] 30%|███ | 3675/12188 [7:50:41<17:11:55, 7.27s/it] {'loss': 0.3467, 'grad_norm': 0.6332827764577323, 'learning_rate': 8.189330800540662e-06, 'epoch': 0.3} + 30%|███ | 3675/12188 [7:50:41<17:11:55, 7.27s/it] 30%|███ | 3676/12188 [7:50:48<16:44:58, 7.08s/it] {'loss': 0.3484, 'grad_norm': 0.6428811281422284, 'learning_rate': 8.188307388628542e-06, 'epoch': 0.3} + 30%|███ | 3676/12188 [7:50:48<16:44:58, 7.08s/it] 30%|███ | 3677/12188 [7:50:55<16:47:42, 7.10s/it] {'loss': 0.3824, 'grad_norm': 0.6849333939129765, 'learning_rate': 8.187283751563275e-06, 'epoch': 0.3} + 30%|███ | 3677/12188 [7:50:55<16:47:42, 7.10s/it] 30%|███ | 3678/12188 [7:51:02<16:39:44, 7.05s/it] {'loss': 0.3735, 'grad_norm': 0.616381359430852, 'learning_rate': 8.186259889417149e-06, 'epoch': 0.3} + 30%|███ | 3678/12188 [7:51:02<16:39:44, 7.05s/it] 30%|███ | 3679/12188 [7:51:09<16:43:50, 7.08s/it] {'loss': 0.3857, 'grad_norm': 0.7332272667209161, 'learning_rate': 8.18523580226247e-06, 'epoch': 0.3} + 30%|███ | 3679/12188 [7:51:09<16:43:50, 7.08s/it] 30%|███ | 3680/12188 [7:51:16<16:46:28, 7.10s/it] {'loss': 0.3666, 'grad_norm': 0.6849108715925101, 'learning_rate': 8.184211490171557e-06, 'epoch': 0.3} + 30%|███ | 3680/12188 [7:51:16<16:46:28, 7.10s/it] 30%|███ | 3681/12188 [7:51:24<17:29:03, 7.40s/it] {'loss': 0.3506, 'grad_norm': 0.6704428751675263, 'learning_rate': 8.183186953216743e-06, 'epoch': 0.3} + 30%|███ | 3681/12188 [7:51:24<17:29:03, 7.40s/it] 30%|███ | 3682/12188 [7:51:33<18:19:20, 7.75s/it] {'loss': 0.3634, 'grad_norm': 0.714640095208256, 'learning_rate': 8.18216219147038e-06, 'epoch': 0.3} + 30%|███ | 3682/12188 [7:51:33<18:19:20, 7.75s/it] 30%|███ | 3683/12188 [7:51:40<17:46:38, 7.52s/it] {'loss': 0.3489, 'grad_norm': 0.7114754792366482, 'learning_rate': 8.181137205004836e-06, 'epoch': 0.3} + 30%|███ | 3683/12188 [7:51:40<17:46:38, 7.52s/it] 30%|███ | 3684/12188 [7:51:47<17:44:32, 7.51s/it] {'loss': 0.3387, 'grad_norm': 0.6891030127240041, 'learning_rate': 8.180111993892494e-06, 'epoch': 0.3} + 30%|███ | 3684/12188 [7:51:47<17:44:32, 7.51s/it] 30%|███ | 3685/12188 [7:51:54<17:29:59, 7.41s/it] {'loss': 0.386, 'grad_norm': 0.695446166347455, 'learning_rate': 8.179086558205754e-06, 'epoch': 0.3} + 30%|███ | 3685/12188 [7:51:54<17:29:59, 7.41s/it] 30%|███ | 3686/12188 [7:52:03<18:18:20, 7.75s/it] {'loss': 0.327, 'grad_norm': 0.6646124895496505, 'learning_rate': 8.178060898017027e-06, 'epoch': 0.3} + 30%|███ | 3686/12188 [7:52:03<18:18:20, 7.75s/it] 30%|███ | 3687/12188 [7:52:10<17:35:42, 7.45s/it] {'loss': 0.386, 'grad_norm': 0.6406450692977516, 'learning_rate': 8.177035013398744e-06, 'epoch': 0.3} + 30%|███ | 3687/12188 [7:52:10<17:35:42, 7.45s/it] 30%|███ | 3688/12188 [7:52:17<17:44:22, 7.51s/it] {'loss': 0.4364, 'grad_norm': 0.6293813815558957, 'learning_rate': 8.176008904423354e-06, 'epoch': 0.3} + 30%|███ | 3688/12188 [7:52:17<17:44:22, 7.51s/it] 30%|███ | 3689/12188 [7:52:25<17:30:20, 7.41s/it] {'loss': 0.3655, 'grad_norm': 0.6246980516282074, 'learning_rate': 8.17498257116332e-06, 'epoch': 0.3} + 30%|███ | 3689/12188 [7:52:25<17:30:20, 7.41s/it] 30%|███ | 3690/12188 [7:52:32<17:53:04, 7.58s/it] {'loss': 0.359, 'grad_norm': 0.6515939557908829, 'learning_rate': 8.173956013691115e-06, 'epoch': 0.3} + 30%|███ | 3690/12188 [7:52:32<17:53:04, 7.58s/it] 30%|███ | 3691/12188 [7:52:41<18:41:48, 7.92s/it] {'loss': 0.3768, 'grad_norm': 0.6174616355625498, 'learning_rate': 8.172929232079237e-06, 'epoch': 0.3} + 30%|███ | 3691/12188 [7:52:41<18:41:48, 7.92s/it] 30%|███ | 3692/12188 [7:52:48<18:15:08, 7.73s/it] {'loss': 0.3671, 'grad_norm': 0.6659977087219559, 'learning_rate': 8.171902226400192e-06, 'epoch': 0.3} + 30%|███ | 3692/12188 [7:52:49<18:15:08, 7.73s/it] 30%|███ | 3693/12188 [7:52:56<18:20:57, 7.78s/it] {'loss': 0.3907, 'grad_norm': 0.6985377160200529, 'learning_rate': 8.17087499672651e-06, 'epoch': 0.3} + 30%|███ | 3693/12188 [7:52:56<18:20:57, 7.78s/it] 30%|███ | 3694/12188 [7:53:03<17:43:54, 7.52s/it] {'loss': 0.3594, 'grad_norm': 0.6654448702284389, 'learning_rate': 8.16984754313073e-06, 'epoch': 0.3} + 30%|███ | 3694/12188 [7:53:03<17:43:54, 7.52s/it] 30%|███ | 3695/12188 [7:53:12<18:18:38, 7.76s/it] {'loss': 0.3623, 'grad_norm': 0.6833930174490663, 'learning_rate': 8.16881986568541e-06, 'epoch': 0.3} + 30%|███ | 3695/12188 [7:53:12<18:18:38, 7.76s/it] 30%|███ | 3696/12188 [7:53:19<17:48:10, 7.55s/it] {'loss': 0.324, 'grad_norm': 0.7487952615884995, 'learning_rate': 8.167791964463119e-06, 'epoch': 0.3} + 30%|███ | 3696/12188 [7:53:19<17:48:10, 7.55s/it] 30%|███ | 3697/12188 [7:53:26<17:20:29, 7.35s/it] {'loss': 0.3214, 'grad_norm': 0.6104002003052371, 'learning_rate': 8.166763839536451e-06, 'epoch': 0.3} + 30%|███ | 3697/12188 [7:53:26<17:20:29, 7.35s/it] 30%|███ | 3698/12188 [7:53:35<18:55:03, 8.02s/it] {'loss': 0.36, 'grad_norm': 0.676941931357917, 'learning_rate': 8.165735490978009e-06, 'epoch': 0.3} + 30%|███ | 3698/12188 [7:53:35<18:55:03, 8.02s/it] 30%|███ | 3699/12188 [7:53:42<18:06:41, 7.68s/it] {'loss': 0.366, 'grad_norm': 0.6338090419478866, 'learning_rate': 8.164706918860413e-06, 'epoch': 0.3} + 30%|███ | 3699/12188 [7:53:42<18:06:41, 7.68s/it] 30%|███ | 3700/12188 [7:53:49<17:53:08, 7.59s/it] {'loss': 0.3573, 'grad_norm': 0.6839412925567715, 'learning_rate': 8.163678123256298e-06, 'epoch': 0.3} + 30%|███ | 3700/12188 [7:53:49<17:53:08, 7.59s/it] 30%|███ | 3701/12188 [7:53:57<17:39:56, 7.49s/it] {'loss': 0.448, 'grad_norm': 0.6483757337686723, 'learning_rate': 8.162649104238317e-06, 'epoch': 0.3} + 30%|███ | 3701/12188 [7:53:57<17:39:56, 7.49s/it] 30%|███ | 3702/12188 [7:54:03<17:02:24, 7.23s/it] {'loss': 0.4177, 'grad_norm': 0.639655469631495, 'learning_rate': 8.161619861879136e-06, 'epoch': 0.3} + 30%|███ | 3702/12188 [7:54:03<17:02:24, 7.23s/it] 30%|███ | 3703/12188 [7:54:11<17:04:19, 7.24s/it] {'loss': 0.3982, 'grad_norm': 0.6622430278969967, 'learning_rate': 8.160590396251442e-06, 'epoch': 0.3} + 30%|███ | 3703/12188 [7:54:11<17:04:19, 7.24s/it] 30%|███ | 3704/12188 [7:54:18<17:13:30, 7.31s/it] {'loss': 0.3722, 'grad_norm': 0.8296519444821848, 'learning_rate': 8.159560707427931e-06, 'epoch': 0.3} + 30%|███ | 3704/12188 [7:54:18<17:13:30, 7.31s/it] 30%|███ | 3705/12188 [7:54:25<16:44:15, 7.10s/it] {'loss': 0.3618, 'grad_norm': 0.65312055420101, 'learning_rate': 8.158530795481318e-06, 'epoch': 0.3} + 30%|███ | 3705/12188 [7:54:25<16:44:15, 7.10s/it] 30%|███ | 3706/12188 [7:54:32<17:02:26, 7.23s/it] {'loss': 0.389, 'grad_norm': 0.6396627037384319, 'learning_rate': 8.157500660484338e-06, 'epoch': 0.3} + 30%|███ | 3706/12188 [7:54:32<17:02:26, 7.23s/it] 30%|███ | 3707/12188 [7:54:39<16:48:11, 7.13s/it] {'loss': 0.363, 'grad_norm': 0.8488563043330178, 'learning_rate': 8.156470302509732e-06, 'epoch': 0.3} + 30%|███ | 3707/12188 [7:54:39<16:48:11, 7.13s/it] 30%|███ | 3708/12188 [7:54:46<16:30:26, 7.01s/it] {'loss': 0.3911, 'grad_norm': 0.8949899371459598, 'learning_rate': 8.155439721630265e-06, 'epoch': 0.3} + 30%|███ | 3708/12188 [7:54:46<16:30:26, 7.01s/it] 30%|███ | 3709/12188 [7:54:54<17:34:47, 7.46s/it] {'loss': 0.3508, 'grad_norm': 0.6586773593839367, 'learning_rate': 8.154408917918715e-06, 'epoch': 0.3} + 30%|███ | 3709/12188 [7:54:54<17:34:47, 7.46s/it] 30%|███ | 3710/12188 [7:55:03<18:10:53, 7.72s/it] {'loss': 0.3872, 'grad_norm': 0.6683063922685982, 'learning_rate': 8.153377891447873e-06, 'epoch': 0.3} + 30%|███ | 3710/12188 [7:55:03<18:10:53, 7.72s/it] 30%|███ | 3711/12188 [7:55:10<18:05:53, 7.69s/it] {'loss': 0.3415, 'grad_norm': 0.625530565985483, 'learning_rate': 8.152346642290553e-06, 'epoch': 0.3} + 30%|███ | 3711/12188 [7:55:10<18:05:53, 7.69s/it] 30%|███ | 3712/12188 [7:55:18<18:03:50, 7.67s/it] {'loss': 0.3516, 'grad_norm': 0.6196339556766683, 'learning_rate': 8.151315170519576e-06, 'epoch': 0.3} + 30%|███ | 3712/12188 [7:55:18<18:03:50, 7.67s/it] 30%|███ | 3713/12188 [7:55:25<17:38:31, 7.49s/it] {'loss': 0.3592, 'grad_norm': 0.6582202616014271, 'learning_rate': 8.150283476207785e-06, 'epoch': 0.3} + 30%|███ | 3713/12188 [7:55:25<17:38:31, 7.49s/it] 30%|███ | 3714/12188 [7:55:32<17:16:24, 7.34s/it] {'loss': 0.3764, 'grad_norm': 0.6471003772103989, 'learning_rate': 8.149251559428037e-06, 'epoch': 0.3} + 30%|███ | 3714/12188 [7:55:32<17:16:24, 7.34s/it] 30%|███ | 3715/12188 [7:55:39<16:54:06, 7.18s/it] {'loss': 0.3443, 'grad_norm': 0.6000829175740287, 'learning_rate': 8.148219420253204e-06, 'epoch': 0.3} + 30%|███ | 3715/12188 [7:55:39<16:54:06, 7.18s/it] 30%|███ | 3716/12188 [7:55:47<17:34:18, 7.47s/it] {'loss': 0.3725, 'grad_norm': 0.6820618672812747, 'learning_rate': 8.147187058756173e-06, 'epoch': 0.3} + 30%|███ | 3716/12188 [7:55:47<17:34:18, 7.47s/it] 30%|███ | 3717/12188 [7:55:54<17:35:36, 7.48s/it] {'loss': 0.4129, 'grad_norm': 0.6464593079106565, 'learning_rate': 8.146154475009849e-06, 'epoch': 0.3} + 30%|███ | 3717/12188 [7:55:54<17:35:36, 7.48s/it] 31%|███ | 3718/12188 [7:56:02<17:23:59, 7.40s/it] {'loss': 0.3981, 'grad_norm': 0.5990658193462356, 'learning_rate': 8.145121669087148e-06, 'epoch': 0.31} + 31%|███ | 3718/12188 [7:56:02<17:23:59, 7.40s/it] 31%|███ | 3719/12188 [7:56:08<17:02:33, 7.24s/it] {'loss': 0.3669, 'grad_norm': 0.7368837391827804, 'learning_rate': 8.14408864106101e-06, 'epoch': 0.31} + 31%|███ | 3719/12188 [7:56:08<17:02:33, 7.24s/it] 31%|███ | 3720/12188 [7:56:16<17:06:12, 7.27s/it] {'loss': 0.3596, 'grad_norm': 0.6330419826420343, 'learning_rate': 8.143055391004383e-06, 'epoch': 0.31} + 31%|███ | 3720/12188 [7:56:16<17:06:12, 7.27s/it] 31%|███ | 3721/12188 [7:56:23<16:54:18, 7.19s/it] {'loss': 0.3457, 'grad_norm': 0.6796482450217874, 'learning_rate': 8.142021918990234e-06, 'epoch': 0.31} + 31%|███ | 3721/12188 [7:56:23<16:54:18, 7.19s/it] 31%|███ | 3722/12188 [7:56:32<18:09:13, 7.72s/it] {'loss': 0.3774, 'grad_norm': 0.6527959583661851, 'learning_rate': 8.140988225091544e-06, 'epoch': 0.31} + 31%|███ | 3722/12188 [7:56:32<18:09:13, 7.72s/it] 31%|███ | 3723/12188 [7:56:39<17:52:39, 7.60s/it] {'loss': 0.3872, 'grad_norm': 0.7193023778973627, 'learning_rate': 8.139954309381314e-06, 'epoch': 0.31} + 31%|███ | 3723/12188 [7:56:39<17:52:39, 7.60s/it] 31%|███ | 3724/12188 [7:56:47<18:01:38, 7.67s/it] {'loss': 0.3431, 'grad_norm': 0.633028113186812, 'learning_rate': 8.138920171932554e-06, 'epoch': 0.31} + 31%|███ | 3724/12188 [7:56:47<18:01:38, 7.67s/it] 31%|███ | 3725/12188 [7:56:54<17:16:32, 7.35s/it] {'loss': 0.3861, 'grad_norm': 0.7903542669249172, 'learning_rate': 8.137885812818296e-06, 'epoch': 0.31} + 31%|███ | 3725/12188 [7:56:54<17:16:32, 7.35s/it] 31%|███ | 3726/12188 [7:57:01<17:11:14, 7.31s/it] {'loss': 0.3152, 'grad_norm': 0.6718901445630933, 'learning_rate': 8.136851232111582e-06, 'epoch': 0.31} + 31%|███ | 3726/12188 [7:57:01<17:11:14, 7.31s/it] 31%|███ | 3727/12188 [7:57:08<17:24:10, 7.40s/it] {'loss': 0.3538, 'grad_norm': 0.6319407351078121, 'learning_rate': 8.135816429885473e-06, 'epoch': 0.31} + 31%|███ | 3727/12188 [7:57:08<17:24:10, 7.40s/it] 31%|███ | 3728/12188 [7:57:16<17:28:06, 7.43s/it] {'loss': 0.4108, 'grad_norm': 0.7186039003497519, 'learning_rate': 8.134781406213048e-06, 'epoch': 0.31} + 31%|███ | 3728/12188 [7:57:16<17:28:06, 7.43s/it] 31%|███ | 3729/12188 [7:57:23<17:04:40, 7.27s/it] {'loss': 0.3745, 'grad_norm': 0.697040414967708, 'learning_rate': 8.133746161167396e-06, 'epoch': 0.31} + 31%|███ | 3729/12188 [7:57:23<17:04:40, 7.27s/it] 31%|███ | 3730/12188 [7:57:31<17:26:04, 7.42s/it] {'loss': 0.355, 'grad_norm': 0.5967516965760348, 'learning_rate': 8.132710694821625e-06, 'epoch': 0.31} + 31%|███ | 3730/12188 [7:57:31<17:26:04, 7.42s/it] 31%|███ | 3731/12188 [7:57:38<17:09:34, 7.30s/it] {'loss': 0.3742, 'grad_norm': 0.641143424217867, 'learning_rate': 8.131675007248854e-06, 'epoch': 0.31} + 31%|███ | 3731/12188 [7:57:38<17:09:34, 7.30s/it] 31%|███ | 3732/12188 [7:57:45<17:32:41, 7.47s/it] {'loss': 0.3438, 'grad_norm': 0.7042387759902232, 'learning_rate': 8.130639098522229e-06, 'epoch': 0.31} + 31%|███ | 3732/12188 [7:57:45<17:32:41, 7.47s/it] 31%|███ | 3733/12188 [7:57:53<17:29:04, 7.44s/it] {'loss': 0.352, 'grad_norm': 0.6760606517123672, 'learning_rate': 8.1296029687149e-06, 'epoch': 0.31} + 31%|███ | 3733/12188 [7:57:53<17:29:04, 7.44s/it] 31%|███ | 3734/12188 [7:58:00<16:57:30, 7.22s/it] {'loss': 0.3445, 'grad_norm': 0.6904951491455673, 'learning_rate': 8.128566617900038e-06, 'epoch': 0.31} + 31%|███ | 3734/12188 [7:58:00<16:57:30, 7.22s/it] 31%|███ | 3735/12188 [7:58:07<17:12:29, 7.33s/it] {'loss': 0.3246, 'grad_norm': 0.7478448105713729, 'learning_rate': 8.127530046150827e-06, 'epoch': 0.31} + 31%|███ | 3735/12188 [7:58:07<17:12:29, 7.33s/it] 31%|███ | 3736/12188 [7:58:15<17:51:15, 7.60s/it] {'loss': 0.3942, 'grad_norm': 0.699408515137096, 'learning_rate': 8.126493253540468e-06, 'epoch': 0.31} + 31%|███ | 3736/12188 [7:58:15<17:51:15, 7.60s/it] 31%|███ | 3737/12188 [7:58:24<18:55:24, 8.06s/it] {'loss': 0.38, 'grad_norm': 0.6245814278527828, 'learning_rate': 8.12545624014218e-06, 'epoch': 0.31} + 31%|███ | 3737/12188 [7:58:24<18:55:24, 8.06s/it] 31%|███ | 3738/12188 [7:58:31<18:05:40, 7.71s/it] {'loss': 0.3814, 'grad_norm': 0.6399073847357779, 'learning_rate': 8.124419006029194e-06, 'epoch': 0.31} + 31%|███ | 3738/12188 [7:58:31<18:05:40, 7.71s/it] 31%|███ | 3739/12188 [7:58:39<17:45:23, 7.57s/it] {'loss': 0.3326, 'grad_norm': 0.6484955172867246, 'learning_rate': 8.123381551274757e-06, 'epoch': 0.31} + 31%|███ | 3739/12188 [7:58:39<17:45:23, 7.57s/it] 31%|███ | 3740/12188 [7:58:46<17:45:35, 7.57s/it] {'loss': 0.3491, 'grad_norm': 0.6563902836181652, 'learning_rate': 8.122343875952132e-06, 'epoch': 0.31} + 31%|███ | 3740/12188 [7:58:46<17:45:35, 7.57s/it] 31%|███ | 3741/12188 [7:58:54<17:56:05, 7.64s/it] {'loss': 0.3461, 'grad_norm': 0.6947986187680709, 'learning_rate': 8.1213059801346e-06, 'epoch': 0.31} + 31%|███ | 3741/12188 [7:58:54<17:56:05, 7.64s/it] 31%|███ | 3742/12188 [7:59:01<17:27:37, 7.44s/it] {'loss': 0.3455, 'grad_norm': 0.6270537823736456, 'learning_rate': 8.120267863895454e-06, 'epoch': 0.31} + 31%|███ | 3742/12188 [7:59:01<17:27:37, 7.44s/it] 31%|███ | 3743/12188 [7:59:08<17:04:42, 7.28s/it] {'loss': 0.3619, 'grad_norm': 0.7256888674252525, 'learning_rate': 8.119229527308006e-06, 'epoch': 0.31} + 31%|███ | 3743/12188 [7:59:08<17:04:42, 7.28s/it] 31%|███ | 3744/12188 [7:59:15<17:12:12, 7.33s/it] {'loss': 0.3207, 'grad_norm': 0.5854846940049835, 'learning_rate': 8.11819097044558e-06, 'epoch': 0.31} + 31%|███ | 3744/12188 [7:59:15<17:12:12, 7.33s/it] 31%|███ | 3745/12188 [7:59:23<17:16:18, 7.36s/it] {'loss': 0.3376, 'grad_norm': 0.6820854094251013, 'learning_rate': 8.117152193381514e-06, 'epoch': 0.31} + 31%|███ | 3745/12188 [7:59:23<17:16:18, 7.36s/it] 31%|███ | 3746/12188 [7:59:29<16:46:21, 7.15s/it] {'loss': 0.3452, 'grad_norm': 0.6077357388188029, 'learning_rate': 8.116113196189172e-06, 'epoch': 0.31} + 31%|███ | 3746/12188 [7:59:29<16:46:21, 7.15s/it] 31%|███ | 3747/12188 [7:59:37<16:58:24, 7.24s/it] {'loss': 0.3638, 'grad_norm': 0.6191818946951178, 'learning_rate': 8.11507397894192e-06, 'epoch': 0.31} + 31%|███ | 3747/12188 [7:59:37<16:58:24, 7.24s/it] 31%|███ | 3748/12188 [7:59:45<17:32:33, 7.48s/it] {'loss': 0.3527, 'grad_norm': 0.6307666598701795, 'learning_rate': 8.114034541713152e-06, 'epoch': 0.31} + 31%|███ | 3748/12188 [7:59:45<17:32:33, 7.48s/it] 31%|███ | 3749/12188 [7:59:52<17:18:08, 7.38s/it] {'loss': 0.4309, 'grad_norm': 0.6897836657645112, 'learning_rate': 8.112994884576263e-06, 'epoch': 0.31} + 31%|███ | 3749/12188 [7:59:52<17:18:08, 7.38s/it] 31%|███ | 3750/12188 [8:00:00<17:31:34, 7.48s/it] {'loss': 0.3502, 'grad_norm': 0.764316386195445, 'learning_rate': 8.111955007604679e-06, 'epoch': 0.31} + 31%|███ | 3750/12188 [8:00:00<17:31:34, 7.48s/it] 31%|███ | 3751/12188 [8:00:07<17:12:30, 7.34s/it] {'loss': 0.337, 'grad_norm': 0.6944829968747057, 'learning_rate': 8.110914910871834e-06, 'epoch': 0.31} + 31%|███ | 3751/12188 [8:00:07<17:12:30, 7.34s/it] 31%|███ | 3752/12188 [8:00:15<17:35:06, 7.50s/it] {'loss': 0.3402, 'grad_norm': 0.6152311676733283, 'learning_rate': 8.109874594451172e-06, 'epoch': 0.31} + 31%|███ | 3752/12188 [8:00:15<17:35:06, 7.50s/it] 31%|███ | 3753/12188 [8:00:22<17:25:33, 7.44s/it] {'loss': 0.3577, 'grad_norm': 0.6325560970700014, 'learning_rate': 8.108834058416167e-06, 'epoch': 0.31} + 31%|███ | 3753/12188 [8:00:22<17:25:33, 7.44s/it] 31%|███ | 3754/12188 [8:00:29<17:14:49, 7.36s/it] {'loss': 0.3716, 'grad_norm': 0.6649117388536481, 'learning_rate': 8.107793302840292e-06, 'epoch': 0.31} + 31%|███ | 3754/12188 [8:00:29<17:14:49, 7.36s/it] 31%|███ | 3755/12188 [8:00:36<17:14:55, 7.36s/it] {'loss': 0.3472, 'grad_norm': 0.7025638345168482, 'learning_rate': 8.106752327797047e-06, 'epoch': 0.31} + 31%|███ | 3755/12188 [8:00:37<17:14:55, 7.36s/it] 31%|███ | 3756/12188 [8:00:44<17:35:48, 7.51s/it] {'loss': 0.3924, 'grad_norm': 0.690686430472669, 'learning_rate': 8.105711133359946e-06, 'epoch': 0.31} + 31%|███ | 3756/12188 [8:00:44<17:35:48, 7.51s/it] 31%|███ | 3757/12188 [8:00:51<17:15:45, 7.37s/it] {'loss': 0.3874, 'grad_norm': 0.7638037162775908, 'learning_rate': 8.104669719602514e-06, 'epoch': 0.31} + 31%|███ | 3757/12188 [8:00:51<17:15:45, 7.37s/it] 31%|███ | 3758/12188 [8:00:58<17:00:35, 7.26s/it] {'loss': 0.3263, 'grad_norm': 0.7063535891478551, 'learning_rate': 8.103628086598293e-06, 'epoch': 0.31} + 31%|███ | 3758/12188 [8:00:58<17:00:35, 7.26s/it] 31%|███ | 3759/12188 [8:01:05<16:44:00, 7.15s/it] {'loss': 0.3306, 'grad_norm': 0.6930317178207955, 'learning_rate': 8.102586234420843e-06, 'epoch': 0.31} + 31%|███ | 3759/12188 [8:01:05<16:44:00, 7.15s/it] 31%|███ | 3760/12188 [8:01:13<16:57:15, 7.24s/it] {'loss': 0.3927, 'grad_norm': 0.6589628128630135, 'learning_rate': 8.10154416314374e-06, 'epoch': 0.31} + 31%|███ | 3760/12188 [8:01:13<16:57:15, 7.24s/it] 31%|███ | 3761/12188 [8:01:20<16:39:47, 7.12s/it] {'loss': 0.3527, 'grad_norm': 0.8679995197507775, 'learning_rate': 8.10050187284057e-06, 'epoch': 0.31} + 31%|███ | 3761/12188 [8:01:20<16:39:47, 7.12s/it] 31%|███ | 3762/12188 [8:01:27<16:51:21, 7.20s/it] {'loss': 0.3506, 'grad_norm': 0.6500420558104189, 'learning_rate': 8.099459363584939e-06, 'epoch': 0.31} + 31%|███ | 3762/12188 [8:01:27<16:51:21, 7.20s/it] 31%|███ | 3763/12188 [8:01:34<17:01:01, 7.27s/it] {'loss': 0.323, 'grad_norm': 0.6820795028662977, 'learning_rate': 8.098416635450467e-06, 'epoch': 0.31} + 31%|███ | 3763/12188 [8:01:34<17:01:01, 7.27s/it] 31%|███ | 3764/12188 [8:01:41<16:33:59, 7.08s/it] {'loss': 0.3589, 'grad_norm': 0.8172453296134877, 'learning_rate': 8.097373688510791e-06, 'epoch': 0.31} + 31%|███ | 3764/12188 [8:01:41<16:33:59, 7.08s/it] 31%|███ | 3765/12188 [8:01:48<16:41:41, 7.14s/it] {'loss': 0.3796, 'grad_norm': 0.690002560494531, 'learning_rate': 8.096330522839562e-06, 'epoch': 0.31} + 31%|███ | 3765/12188 [8:01:48<16:41:41, 7.14s/it] 31%|███ | 3766/12188 [8:01:55<16:28:36, 7.04s/it] {'loss': 0.3449, 'grad_norm': 0.6437657353090358, 'learning_rate': 8.095287138510444e-06, 'epoch': 0.31} + 31%|███ | 3766/12188 [8:01:55<16:28:36, 7.04s/it] 31%|███ | 3767/12188 [8:02:03<16:53:55, 7.22s/it] {'loss': 0.332, 'grad_norm': 0.631329022331102, 'learning_rate': 8.094243535597122e-06, 'epoch': 0.31} + 31%|███ | 3767/12188 [8:02:03<16:53:55, 7.22s/it] 31%|███ | 3768/12188 [8:02:10<16:36:39, 7.10s/it] {'loss': 0.3609, 'grad_norm': 0.6632217754130587, 'learning_rate': 8.093199714173295e-06, 'epoch': 0.31} + 31%|███ | 3768/12188 [8:02:10<16:36:39, 7.10s/it] 31%|███ | 3769/12188 [8:02:17<17:00:30, 7.27s/it] {'loss': 0.3401, 'grad_norm': 0.6190182755280317, 'learning_rate': 8.092155674312672e-06, 'epoch': 0.31} + 31%|███ | 3769/12188 [8:02:17<17:00:30, 7.27s/it] 31%|███ | 3770/12188 [8:02:24<16:56:59, 7.25s/it] {'loss': 0.3343, 'grad_norm': 0.6700915707663538, 'learning_rate': 8.091111416088982e-06, 'epoch': 0.31} + 31%|███ | 3770/12188 [8:02:24<16:56:59, 7.25s/it] 31%|███ | 3771/12188 [8:02:31<16:25:44, 7.03s/it] {'loss': 0.3691, 'grad_norm': 0.7041549628336048, 'learning_rate': 8.090066939575972e-06, 'epoch': 0.31} + 31%|███ | 3771/12188 [8:02:31<16:25:44, 7.03s/it] 31%|███ | 3772/12188 [8:02:39<16:49:37, 7.20s/it] {'loss': 0.3459, 'grad_norm': 0.6827893084338273, 'learning_rate': 8.089022244847398e-06, 'epoch': 0.31} + 31%|███ | 3772/12188 [8:02:39<16:49:37, 7.20s/it] 31%|███ | 3773/12188 [8:02:46<16:50:30, 7.21s/it] {'loss': 0.3187, 'grad_norm': 0.6050722508117824, 'learning_rate': 8.087977331977037e-06, 'epoch': 0.31} + 31%|███ | 3773/12188 [8:02:46<16:50:30, 7.21s/it] 31%|███ | 3774/12188 [8:02:53<16:40:45, 7.14s/it] {'loss': 0.3493, 'grad_norm': 0.6622323771789739, 'learning_rate': 8.086932201038677e-06, 'epoch': 0.31} + 31%|███ | 3774/12188 [8:02:53<16:40:45, 7.14s/it] 31%|███ | 3775/12188 [8:02:59<16:10:18, 6.92s/it] {'loss': 0.3159, 'grad_norm': 0.7444730415871988, 'learning_rate': 8.085886852106126e-06, 'epoch': 0.31} + 31%|███ | 3775/12188 [8:02:59<16:10:18, 6.92s/it] 31%|███ | 3776/12188 [8:03:06<15:53:53, 6.80s/it] {'loss': 0.3635, 'grad_norm': 0.7822435732768103, 'learning_rate': 8.084841285253202e-06, 'epoch': 0.31} + 31%|███ | 3776/12188 [8:03:06<15:53:53, 6.80s/it] 31%|███ | 3777/12188 [8:03:14<16:42:29, 7.15s/it] {'loss': 0.3542, 'grad_norm': 0.6193612202493416, 'learning_rate': 8.08379550055374e-06, 'epoch': 0.31} + 31%|███ | 3777/12188 [8:03:14<16:42:29, 7.15s/it] 31%|███ | 3778/12188 [8:03:21<16:40:02, 7.13s/it] {'loss': 0.3353, 'grad_norm': 0.6409272255557549, 'learning_rate': 8.0827494980816e-06, 'epoch': 0.31} + 31%|███ | 3778/12188 [8:03:21<16:40:02, 7.13s/it] 31%|███ | 3779/12188 [8:03:27<16:23:14, 7.02s/it] {'loss': 0.3792, 'grad_norm': 0.7003568573702437, 'learning_rate': 8.08170327791064e-06, 'epoch': 0.31} + 31%|███ | 3779/12188 [8:03:28<16:23:14, 7.02s/it] 31%|███ | 3780/12188 [8:03:39<19:33:53, 8.38s/it] {'loss': 0.3601, 'grad_norm': 0.7305430668852296, 'learning_rate': 8.080656840114746e-06, 'epoch': 0.31} + 31%|███ | 3780/12188 [8:03:39<19:33:53, 8.38s/it] 31%|███ | 3781/12188 [8:03:46<18:23:18, 7.87s/it] {'loss': 0.3502, 'grad_norm': 0.6506541728102117, 'learning_rate': 8.079610184767817e-06, 'epoch': 0.31} + 31%|███ | 3781/12188 [8:03:46<18:23:18, 7.87s/it] 31%|███ | 3782/12188 [8:03:53<17:55:47, 7.68s/it] {'loss': 0.3728, 'grad_norm': 0.663606491894116, 'learning_rate': 8.078563311943762e-06, 'epoch': 0.31} + 31%|███ | 3782/12188 [8:03:53<17:55:47, 7.68s/it] 31%|███ | 3783/12188 [8:04:02<18:33:42, 7.95s/it] {'loss': 0.3428, 'grad_norm': 0.8156997176488361, 'learning_rate': 8.077516221716512e-06, 'epoch': 0.31} + 31%|███ | 3783/12188 [8:04:02<18:33:42, 7.95s/it] 31%|███ | 3784/12188 [8:04:08<17:47:55, 7.62s/it] {'loss': 0.3577, 'grad_norm': 0.6055363967819456, 'learning_rate': 8.076468914160012e-06, 'epoch': 0.31} + 31%|███ | 3784/12188 [8:04:08<17:47:55, 7.62s/it] 31%|███ | 3785/12188 [8:04:16<17:28:20, 7.49s/it] {'loss': 0.4011, 'grad_norm': 0.7158928624816983, 'learning_rate': 8.07542138934822e-06, 'epoch': 0.31} + 31%|███ | 3785/12188 [8:04:16<17:28:20, 7.49s/it] 31%|███ | 3786/12188 [8:04:23<17:09:32, 7.35s/it] {'loss': 0.3922, 'grad_norm': 0.6401908618143217, 'learning_rate': 8.074373647355111e-06, 'epoch': 0.31} + 31%|███ | 3786/12188 [8:04:23<17:09:32, 7.35s/it] 31%|███ | 3787/12188 [8:04:30<17:10:57, 7.36s/it] {'loss': 0.4126, 'grad_norm': 0.7037931875162666, 'learning_rate': 8.073325688254674e-06, 'epoch': 0.31} + 31%|███ | 3787/12188 [8:04:30<17:10:57, 7.36s/it] 31%|███ | 3788/12188 [8:04:38<17:29:28, 7.50s/it] {'loss': 0.3507, 'grad_norm': 0.6478800700869541, 'learning_rate': 8.072277512120913e-06, 'epoch': 0.31} + 31%|███ | 3788/12188 [8:04:38<17:29:28, 7.50s/it] 31%|███ | 3789/12188 [8:04:45<17:05:38, 7.33s/it] {'loss': 0.3482, 'grad_norm': 1.0708590596761325, 'learning_rate': 8.071229119027854e-06, 'epoch': 0.31} + 31%|███ | 3789/12188 [8:04:45<17:05:38, 7.33s/it] 31%|███ | 3790/12188 [8:04:52<17:03:47, 7.31s/it] {'loss': 0.369, 'grad_norm': 0.6377954153801891, 'learning_rate': 8.070180509049525e-06, 'epoch': 0.31} + 31%|███ | 3790/12188 [8:04:52<17:03:47, 7.31s/it] 31%|███ | 3791/12188 [8:05:01<18:07:15, 7.77s/it] {'loss': 0.3742, 'grad_norm': 0.6242527972454538, 'learning_rate': 8.069131682259982e-06, 'epoch': 0.31} + 31%|███ | 3791/12188 [8:05:01<18:07:15, 7.77s/it] 31%|███ | 3792/12188 [8:05:08<17:25:01, 7.47s/it] {'loss': 0.3454, 'grad_norm': 0.7138263359445549, 'learning_rate': 8.068082638733292e-06, 'epoch': 0.31} + 31%|███ | 3792/12188 [8:05:08<17:25:01, 7.47s/it] 31%|███ | 3793/12188 [8:05:15<17:20:21, 7.44s/it] {'loss': 0.4027, 'grad_norm': 0.6598161329568597, 'learning_rate': 8.067033378543533e-06, 'epoch': 0.31} + 31%|███ | 3793/12188 [8:05:15<17:20:21, 7.44s/it] 31%|███ | 3794/12188 [8:05:22<16:58:06, 7.28s/it] {'loss': 0.3512, 'grad_norm': 0.6560553662045475, 'learning_rate': 8.065983901764807e-06, 'epoch': 0.31} + 31%|███ | 3794/12188 [8:05:22<16:58:06, 7.28s/it] 31%|███ | 3795/12188 [8:05:29<16:51:52, 7.23s/it] {'loss': 0.3316, 'grad_norm': 0.6808179655597337, 'learning_rate': 8.064934208471223e-06, 'epoch': 0.31} + 31%|███ | 3795/12188 [8:05:29<16:51:52, 7.23s/it] 31%|███ | 3796/12188 [8:05:36<16:27:05, 7.06s/it] {'loss': 0.3901, 'grad_norm': 0.6281325668961318, 'learning_rate': 8.063884298736908e-06, 'epoch': 0.31} + 31%|███ | 3796/12188 [8:05:36<16:27:05, 7.06s/it] 31%|███ | 3797/12188 [8:05:43<16:45:45, 7.19s/it] {'loss': 0.3572, 'grad_norm': 0.6122081147300408, 'learning_rate': 8.062834172636007e-06, 'epoch': 0.31} + 31%|███ | 3797/12188 [8:05:43<16:45:45, 7.19s/it] 31%|███ | 3798/12188 [8:05:50<16:27:03, 7.06s/it] {'loss': 0.3482, 'grad_norm': 0.6220295723603877, 'learning_rate': 8.061783830242677e-06, 'epoch': 0.31} + 31%|███ | 3798/12188 [8:05:50<16:27:03, 7.06s/it] 31%|███ | 3799/12188 [8:05:57<16:26:32, 7.06s/it] {'loss': 0.349, 'grad_norm': 0.7981226439039777, 'learning_rate': 8.060733271631094e-06, 'epoch': 0.31} + 31%|███ | 3799/12188 [8:05:57<16:26:32, 7.06s/it] 31%|███ | 3800/12188 [8:06:04<16:12:25, 6.96s/it] {'loss': 0.3519, 'grad_norm': 0.6820320767824207, 'learning_rate': 8.059682496875443e-06, 'epoch': 0.31} + 31%|███ | 3800/12188 [8:06:04<16:12:25, 6.96s/it] 31%|███ | 3801/12188 [8:06:11<16:42:32, 7.17s/it] {'loss': 0.3247, 'grad_norm': 0.617233903042285, 'learning_rate': 8.05863150604993e-06, 'epoch': 0.31} + 31%|███ | 3801/12188 [8:06:11<16:42:32, 7.17s/it] 31%|███ | 3802/12188 [8:06:18<16:19:38, 7.01s/it] {'loss': 0.3706, 'grad_norm': 0.6494984493154473, 'learning_rate': 8.057580299228775e-06, 'epoch': 0.31} + 31%|███ | 3802/12188 [8:06:18<16:19:38, 7.01s/it] 31%|███ | 3803/12188 [8:06:25<16:07:38, 6.92s/it] {'loss': 0.3651, 'grad_norm': 0.6241086261413347, 'learning_rate': 8.05652887648621e-06, 'epoch': 0.31} + 31%|███ | 3803/12188 [8:06:25<16:07:38, 6.92s/it] 31%|███ | 3804/12188 [8:06:32<16:02:49, 6.89s/it] {'loss': 0.3227, 'grad_norm': 0.5974246493782609, 'learning_rate': 8.055477237896488e-06, 'epoch': 0.31} + 31%|███ | 3804/12188 [8:06:32<16:02:49, 6.89s/it] 31%|███ | 3805/12188 [8:06:38<16:03:38, 6.90s/it] {'loss': 0.3577, 'grad_norm': 0.6988472281649473, 'learning_rate': 8.054425383533873e-06, 'epoch': 0.31} + 31%|███ | 3805/12188 [8:06:38<16:03:38, 6.90s/it] 31%|███ | 3806/12188 [8:06:48<18:13:28, 7.83s/it] {'loss': 0.381, 'grad_norm': 0.6419930526703052, 'learning_rate': 8.053373313472643e-06, 'epoch': 0.31} + 31%|███ | 3806/12188 [8:06:48<18:13:28, 7.83s/it] 31%|███ | 3807/12188 [8:06:56<17:59:05, 7.73s/it] {'loss': 0.3447, 'grad_norm': 0.6063727361863772, 'learning_rate': 8.052321027787095e-06, 'epoch': 0.31} + 31%|███ | 3807/12188 [8:06:56<17:59:05, 7.73s/it] 31%|███ | 3808/12188 [8:07:03<17:44:25, 7.62s/it] {'loss': 0.3519, 'grad_norm': 0.637572784861728, 'learning_rate': 8.051268526551541e-06, 'epoch': 0.31} + 31%|███ | 3808/12188 [8:07:03<17:44:25, 7.62s/it] 31%|███▏ | 3809/12188 [8:07:10<17:01:41, 7.32s/it] {'loss': 0.3295, 'grad_norm': 0.6499384224182844, 'learning_rate': 8.050215809840307e-06, 'epoch': 0.31} + 31%|███▏ | 3809/12188 [8:07:10<17:01:41, 7.32s/it] 31%|███▏ | 3810/12188 [8:07:18<17:21:36, 7.46s/it] {'loss': 0.3717, 'grad_norm': 0.6854629663562677, 'learning_rate': 8.049162877727732e-06, 'epoch': 0.31} + 31%|███▏ | 3810/12188 [8:07:18<17:21:36, 7.46s/it] 31%|███▏ | 3811/12188 [8:07:24<16:50:43, 7.24s/it] {'loss': 0.3662, 'grad_norm': 0.6281481369074717, 'learning_rate': 8.048109730288173e-06, 'epoch': 0.31} + 31%|███▏ | 3811/12188 [8:07:24<16:50:43, 7.24s/it] 31%|███▏ | 3812/12188 [8:07:32<16:43:20, 7.19s/it] {'loss': 0.3404, 'grad_norm': 0.6438323364392188, 'learning_rate': 8.047056367596003e-06, 'epoch': 0.31} + 31%|███▏ | 3812/12188 [8:07:32<16:43:20, 7.19s/it] 31%|███▏ | 3813/12188 [8:07:38<16:32:26, 7.11s/it] {'loss': 0.3546, 'grad_norm': 0.624793658813486, 'learning_rate': 8.046002789725606e-06, 'epoch': 0.31} + 31%|███▏ | 3813/12188 [8:07:38<16:32:26, 7.11s/it] 31%|███▏ | 3814/12188 [8:07:46<16:44:08, 7.19s/it] {'loss': 0.3255, 'grad_norm': 0.6209641221180479, 'learning_rate': 8.044948996751387e-06, 'epoch': 0.31} + 31%|███▏ | 3814/12188 [8:07:46<16:44:08, 7.19s/it] 31%|███▏ | 3815/12188 [8:07:53<16:55:17, 7.28s/it] {'loss': 0.3807, 'grad_norm': 0.6678031841070188, 'learning_rate': 8.043894988747761e-06, 'epoch': 0.31} + 31%|███▏ | 3815/12188 [8:07:53<16:55:17, 7.28s/it] 31%|███▏ | 3816/12188 [8:08:00<16:44:39, 7.20s/it] {'loss': 0.3638, 'grad_norm': 0.662185711859975, 'learning_rate': 8.042840765789164e-06, 'epoch': 0.31} + 31%|███▏ | 3816/12188 [8:08:00<16:44:39, 7.20s/it] 31%|███▏ | 3817/12188 [8:08:09<17:47:53, 7.65s/it] {'loss': 0.3848, 'grad_norm': 0.6921928054538047, 'learning_rate': 8.041786327950037e-06, 'epoch': 0.31} + 31%|███▏ | 3817/12188 [8:08:09<17:47:53, 7.65s/it] 31%|███▏ | 3818/12188 [8:08:16<17:29:03, 7.52s/it] {'loss': 0.369, 'grad_norm': 0.7540856690383578, 'learning_rate': 8.04073167530485e-06, 'epoch': 0.31} + 31%|███▏ | 3818/12188 [8:08:16<17:29:03, 7.52s/it] 31%|███▏ | 3819/12188 [8:08:23<17:08:55, 7.38s/it] {'loss': 0.3467, 'grad_norm': 0.6574172757735359, 'learning_rate': 8.039676807928073e-06, 'epoch': 0.31} + 31%|███▏ | 3819/12188 [8:08:23<17:08:55, 7.38s/it] 31%|███▏ | 3820/12188 [8:08:31<17:03:45, 7.34s/it] {'loss': 0.3587, 'grad_norm': 0.5956322600090069, 'learning_rate': 8.038621725894209e-06, 'epoch': 0.31} + 31%|███▏ | 3820/12188 [8:08:31<17:03:45, 7.34s/it] 31%|███▏ | 3821/12188 [8:08:37<16:37:12, 7.15s/it] {'loss': 0.3415, 'grad_norm': 0.6224785116937196, 'learning_rate': 8.037566429277757e-06, 'epoch': 0.31} + 31%|███▏ | 3821/12188 [8:08:37<16:37:12, 7.15s/it] 31%|███▏ | 3822/12188 [8:08:45<16:48:28, 7.23s/it] {'loss': 0.3622, 'grad_norm': 0.6359340468732328, 'learning_rate': 8.036510918153244e-06, 'epoch': 0.31} + 31%|███▏ | 3822/12188 [8:08:45<16:48:28, 7.23s/it] 31%|███▏ | 3823/12188 [8:08:52<16:41:05, 7.18s/it] {'loss': 0.3157, 'grad_norm': 0.6230590231680748, 'learning_rate': 8.035455192595209e-06, 'epoch': 0.31} + 31%|███▏ | 3823/12188 [8:08:52<16:41:05, 7.18s/it] 31%|███▏ | 3824/12188 [8:08:59<16:36:50, 7.15s/it] {'loss': 0.3419, 'grad_norm': 0.6290988356913858, 'learning_rate': 8.034399252678207e-06, 'epoch': 0.31} + 31%|███▏ | 3824/12188 [8:08:59<16:36:50, 7.15s/it] 31%|███▏ | 3825/12188 [8:09:09<18:58:25, 8.17s/it] {'loss': 0.337, 'grad_norm': 0.6246612551505695, 'learning_rate': 8.033343098476802e-06, 'epoch': 0.31} + 31%|███▏ | 3825/12188 [8:09:09<18:58:25, 8.17s/it] 31%|███▏ | 3826/12188 [8:09:16<18:01:22, 7.76s/it] {'loss': 0.3738, 'grad_norm': 0.6795281513406113, 'learning_rate': 8.032286730065582e-06, 'epoch': 0.31} + 31%|███▏ | 3826/12188 [8:09:16<18:01:22, 7.76s/it] 31%|███▏ | 3827/12188 [8:09:23<17:23:38, 7.49s/it] {'loss': 0.3629, 'grad_norm': 0.6818992681653913, 'learning_rate': 8.031230147519145e-06, 'epoch': 0.31} + 31%|███▏ | 3827/12188 [8:09:23<17:23:38, 7.49s/it] 31%|███▏ | 3828/12188 [8:09:31<17:30:03, 7.54s/it] {'loss': 0.3467, 'grad_norm': 0.6546878633104358, 'learning_rate': 8.030173350912105e-06, 'epoch': 0.31} + 31%|███▏ | 3828/12188 [8:09:31<17:30:03, 7.54s/it] 31%|███▏ | 3829/12188 [8:09:38<17:31:26, 7.55s/it] {'loss': 0.3624, 'grad_norm': 0.5899564776507595, 'learning_rate': 8.029116340319091e-06, 'epoch': 0.31} + 31%|███▏ | 3829/12188 [8:09:38<17:31:26, 7.55s/it] 31%|███▏ | 3830/12188 [8:09:45<16:58:45, 7.31s/it] {'loss': 0.3347, 'grad_norm': 0.5992658169904254, 'learning_rate': 8.028059115814749e-06, 'epoch': 0.31} + 31%|███▏ | 3830/12188 [8:09:45<16:58:45, 7.31s/it] 31%|███▏ | 3831/12188 [8:09:53<17:13:06, 7.42s/it] {'loss': 0.4064, 'grad_norm': 0.6803001263093066, 'learning_rate': 8.027001677473737e-06, 'epoch': 0.31} + 31%|███▏ | 3831/12188 [8:09:53<17:13:06, 7.42s/it] 31%|███▏ | 3832/12188 [8:10:00<17:09:52, 7.40s/it] {'loss': 0.3475, 'grad_norm': 0.6681520034845131, 'learning_rate': 8.025944025370728e-06, 'epoch': 0.31} + 31%|███▏ | 3832/12188 [8:10:00<17:09:52, 7.40s/it] 31%|███▏ | 3833/12188 [8:10:08<17:16:25, 7.44s/it] {'loss': 0.3547, 'grad_norm': 0.600463022313911, 'learning_rate': 8.024886159580414e-06, 'epoch': 0.31} + 31%|███▏ | 3833/12188 [8:10:08<17:16:25, 7.44s/it] 31%|███▏ | 3834/12188 [8:10:15<17:03:44, 7.35s/it] {'loss': 0.3577, 'grad_norm': 0.687896949499154, 'learning_rate': 8.023828080177502e-06, 'epoch': 0.31} + 31%|███▏ | 3834/12188 [8:10:15<17:03:44, 7.35s/it] 31%|███▏ | 3835/12188 [8:10:22<16:53:56, 7.28s/it] {'loss': 0.42, 'grad_norm': 0.6882198647869333, 'learning_rate': 8.022769787236706e-06, 'epoch': 0.31} + 31%|███▏ | 3835/12188 [8:10:22<16:53:56, 7.28s/it] 31%|███▏ | 3836/12188 [8:10:29<16:30:59, 7.12s/it] {'loss': 0.3649, 'grad_norm': 0.7998958883850691, 'learning_rate': 8.021711280832764e-06, 'epoch': 0.31} + 31%|███▏ | 3836/12188 [8:10:29<16:30:59, 7.12s/it] 31%|███▏ | 3837/12188 [8:10:35<16:14:44, 7.00s/it] {'loss': 0.3397, 'grad_norm': 2.232759765918411, 'learning_rate': 8.02065256104043e-06, 'epoch': 0.31} + 31%|███▏ | 3837/12188 [8:10:35<16:14:44, 7.00s/it] 31%|███▏ | 3838/12188 [8:10:43<16:25:34, 7.08s/it] {'loss': 0.3586, 'grad_norm': 0.6549891573351758, 'learning_rate': 8.019593627934461e-06, 'epoch': 0.31} + 31%|███▏ | 3838/12188 [8:10:43<16:25:34, 7.08s/it] 31%|███▏ | 3839/12188 [8:10:49<16:03:41, 6.93s/it] {'loss': 0.3984, 'grad_norm': 0.7309199502236531, 'learning_rate': 8.018534481589641e-06, 'epoch': 0.31} + 31%|███▏ | 3839/12188 [8:10:49<16:03:41, 6.93s/it] 32%|███▏ | 3840/12188 [8:10:56<16:06:49, 6.95s/it] {'loss': 0.3519, 'grad_norm': 0.6345951053777079, 'learning_rate': 8.017475122080767e-06, 'epoch': 0.32} + 32%|███▏ | 3840/12188 [8:10:56<16:06:49, 6.95s/it] 32%|███▏ | 3841/12188 [8:11:05<17:14:02, 7.43s/it] {'loss': 0.3293, 'grad_norm': 0.6259662674338453, 'learning_rate': 8.016415549482649e-06, 'epoch': 0.32} + 32%|███▏ | 3841/12188 [8:11:05<17:14:02, 7.43s/it] 32%|███▏ | 3842/12188 [8:11:11<16:40:38, 7.19s/it] {'loss': 0.371, 'grad_norm': 0.675270323793165, 'learning_rate': 8.015355763870109e-06, 'epoch': 0.32} + 32%|███▏ | 3842/12188 [8:11:11<16:40:38, 7.19s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 32%|███▏ | 3843/12188 [8:11:17<15:31:36, 6.70s/it] {'loss': 0.6751, 'grad_norm': 0.7594717595707051, 'learning_rate': 8.014295765317992e-06, 'epoch': 0.32} + 32%|███▏ | 3843/12188 [8:11:17<15:31:36, 6.70s/it] 32%|███▏ | 3844/12188 [8:11:24<16:03:49, 6.93s/it] {'loss': 0.3725, 'grad_norm': 0.6816959317020318, 'learning_rate': 8.013235553901149e-06, 'epoch': 0.32} + 32%|███▏ | 3844/12188 [8:11:24<16:03:49, 6.93s/it] 32%|███▏ | 3845/12188 [8:11:32<16:19:50, 7.05s/it] {'loss': 0.3706, 'grad_norm': 0.6515114770327648, 'learning_rate': 8.012175129694454e-06, 'epoch': 0.32} + 32%|███▏ | 3845/12188 [8:11:32<16:19:50, 7.05s/it] 32%|███▏ | 3846/12188 [8:11:39<16:34:21, 7.15s/it] {'loss': 0.3641, 'grad_norm': 0.6635054714850697, 'learning_rate': 8.01111449277279e-06, 'epoch': 0.32} + 32%|███▏ | 3846/12188 [8:11:39<16:34:21, 7.15s/it] 32%|███▏ | 3847/12188 [8:11:46<16:24:33, 7.08s/it] {'loss': 0.3571, 'grad_norm': 0.6145524251649618, 'learning_rate': 8.010053643211056e-06, 'epoch': 0.32} + 32%|███▏ | 3847/12188 [8:11:46<16:24:33, 7.08s/it] 32%|███▏ | 3848/12188 [8:11:52<15:59:04, 6.90s/it] {'loss': 0.405, 'grad_norm': 0.7002745133826439, 'learning_rate': 8.008992581084172e-06, 'epoch': 0.32} + 32%|███▏ | 3848/12188 [8:11:52<15:59:04, 6.90s/it] 32%|███▏ | 3849/12188 [8:11:59<15:49:37, 6.83s/it] {'loss': 0.397, 'grad_norm': 0.7038948568120698, 'learning_rate': 8.007931306467066e-06, 'epoch': 0.32} + 32%|███▏ | 3849/12188 [8:11:59<15:49:37, 6.83s/it] 32%|███▏ | 3850/12188 [8:12:06<15:39:56, 6.76s/it] {'loss': 0.3299, 'grad_norm': 0.5908020036463014, 'learning_rate': 8.006869819434684e-06, 'epoch': 0.32} + 32%|███▏ | 3850/12188 [8:12:06<15:39:56, 6.76s/it] 32%|███▏ | 3851/12188 [8:12:12<15:37:37, 6.75s/it] {'loss': 0.3188, 'grad_norm': 0.6706173909670387, 'learning_rate': 8.005808120061986e-06, 'epoch': 0.32} + 32%|███▏ | 3851/12188 [8:12:12<15:37:37, 6.75s/it] 32%|███▏ | 3852/12188 [8:12:20<15:59:24, 6.91s/it] {'loss': 0.3235, 'grad_norm': 0.6428900123328335, 'learning_rate': 8.004746208423948e-06, 'epoch': 0.32} + 32%|███▏ | 3852/12188 [8:12:20<15:59:24, 6.91s/it] 32%|███▏ | 3853/12188 [8:12:27<16:07:34, 6.97s/it] {'loss': 0.3747, 'grad_norm': 0.6461992661214845, 'learning_rate': 8.003684084595558e-06, 'epoch': 0.32} + 32%|███▏ | 3853/12188 [8:12:27<16:07:34, 6.97s/it] 32%|███▏ | 3854/12188 [8:12:34<16:16:49, 7.03s/it] {'loss': 0.3651, 'grad_norm': 0.6195166271713635, 'learning_rate': 8.002621748651827e-06, 'epoch': 0.32} + 32%|███▏ | 3854/12188 [8:12:34<16:16:49, 7.03s/it] 32%|███▏ | 3855/12188 [8:12:41<16:33:25, 7.15s/it] {'loss': 0.3224, 'grad_norm': 0.6760714440685509, 'learning_rate': 8.00155920066777e-06, 'epoch': 0.32} + 32%|███▏ | 3855/12188 [8:12:41<16:33:25, 7.15s/it] 32%|███▏ | 3856/12188 [8:12:49<16:32:50, 7.15s/it] {'loss': 0.3621, 'grad_norm': 0.6483215539218088, 'learning_rate': 8.000496440718427e-06, 'epoch': 0.32} + 32%|███▏ | 3856/12188 [8:12:49<16:32:50, 7.15s/it] 32%|███▏ | 3857/12188 [8:12:55<16:18:07, 7.04s/it] {'loss': 0.3465, 'grad_norm': 0.6982664926721688, 'learning_rate': 7.999433468878844e-06, 'epoch': 0.32} + 32%|███▏ | 3857/12188 [8:12:55<16:18:07, 7.04s/it] 32%|███▏ | 3858/12188 [8:13:04<17:05:17, 7.39s/it] {'loss': 0.3568, 'grad_norm': 0.6114354151398582, 'learning_rate': 7.998370285224087e-06, 'epoch': 0.32} + 32%|███▏ | 3858/12188 [8:13:04<17:05:17, 7.39s/it] 32%|███▏ | 3859/12188 [8:13:10<16:25:05, 7.10s/it] {'loss': 0.4011, 'grad_norm': 0.6678153007574715, 'learning_rate': 7.99730688982924e-06, 'epoch': 0.32} + 32%|███▏ | 3859/12188 [8:13:10<16:25:05, 7.10s/it] 32%|███▏ | 3860/12188 [8:13:18<16:47:27, 7.26s/it] {'loss': 0.3659, 'grad_norm': 0.6299205262030573, 'learning_rate': 7.996243282769395e-06, 'epoch': 0.32} + 32%|███▏ | 3860/12188 [8:13:18<16:47:27, 7.26s/it] 32%|███▏ | 3861/12188 [8:13:25<16:40:02, 7.21s/it] {'loss': 0.3681, 'grad_norm': 0.6304206408387993, 'learning_rate': 7.995179464119664e-06, 'epoch': 0.32} + 32%|███▏ | 3861/12188 [8:13:25<16:40:02, 7.21s/it] 32%|███▏ | 3862/12188 [8:13:32<16:35:36, 7.17s/it] {'loss': 0.3705, 'grad_norm': 0.6683101221777121, 'learning_rate': 7.994115433955172e-06, 'epoch': 0.32} + 32%|███▏ | 3862/12188 [8:13:32<16:35:36, 7.17s/it] 32%|███▏ | 3863/12188 [8:13:39<16:31:07, 7.14s/it] {'loss': 0.354, 'grad_norm': 0.7072235990484781, 'learning_rate': 7.993051192351056e-06, 'epoch': 0.32} + 32%|███▏ | 3863/12188 [8:13:39<16:31:07, 7.14s/it] 32%|███▏ | 3864/12188 [8:13:47<17:23:09, 7.52s/it] {'loss': 0.3551, 'grad_norm': 0.6030812241140354, 'learning_rate': 7.991986739382475e-06, 'epoch': 0.32} + 32%|███▏ | 3864/12188 [8:13:47<17:23:09, 7.52s/it] 32%|███▏ | 3865/12188 [8:13:55<17:46:50, 7.69s/it] {'loss': 0.3355, 'grad_norm': 0.5870045189766122, 'learning_rate': 7.990922075124599e-06, 'epoch': 0.32} + 32%|███▏ | 3865/12188 [8:13:55<17:46:50, 7.69s/it] 32%|███▏ | 3866/12188 [8:14:02<17:14:55, 7.46s/it] {'loss': 0.3762, 'grad_norm': 0.7266934047669696, 'learning_rate': 7.989857199652608e-06, 'epoch': 0.32} + 32%|███▏ | 3866/12188 [8:14:02<17:14:55, 7.46s/it] 32%|███▏ | 3867/12188 [8:14:09<16:54:53, 7.32s/it] {'loss': 0.3386, 'grad_norm': 0.6124220172321695, 'learning_rate': 7.988792113041707e-06, 'epoch': 0.32} + 32%|███▏ | 3867/12188 [8:14:09<16:54:53, 7.32s/it] 32%|███▏ | 3868/12188 [8:14:17<17:13:52, 7.46s/it] {'loss': 0.3061, 'grad_norm': 0.6461141768756277, 'learning_rate': 7.98772681536711e-06, 'epoch': 0.32} + 32%|███▏ | 3868/12188 [8:14:17<17:13:52, 7.46s/it] 32%|███▏ | 3869/12188 [8:14:25<17:24:03, 7.53s/it] {'loss': 0.3657, 'grad_norm': 0.6401013453169033, 'learning_rate': 7.986661306704045e-06, 'epoch': 0.32} + 32%|███▏ | 3869/12188 [8:14:25<17:24:03, 7.53s/it] 32%|███▏ | 3870/12188 [8:14:32<16:59:23, 7.35s/it] {'loss': 0.4089, 'grad_norm': 0.68054047891886, 'learning_rate': 7.985595587127755e-06, 'epoch': 0.32} + 32%|███▏ | 3870/12188 [8:14:32<16:59:23, 7.35s/it] 32%|███▏ | 3871/12188 [8:14:38<16:36:19, 7.19s/it] {'loss': 0.3899, 'grad_norm': 0.6576787243461274, 'learning_rate': 7.984529656713503e-06, 'epoch': 0.32} + 32%|███▏ | 3871/12188 [8:14:39<16:36:19, 7.19s/it] 32%|███▏ | 3872/12188 [8:14:45<16:08:51, 6.99s/it] {'loss': 0.3569, 'grad_norm': 0.6481058653469716, 'learning_rate': 7.983463515536562e-06, 'epoch': 0.32} + 32%|███▏ | 3872/12188 [8:14:45<16:08:51, 6.99s/it] 32%|███▏ | 3873/12188 [8:14:53<16:43:02, 7.24s/it] {'loss': 0.3876, 'grad_norm': 0.720920466891122, 'learning_rate': 7.982397163672219e-06, 'epoch': 0.32} + 32%|███▏ | 3873/12188 [8:14:53<16:43:02, 7.24s/it] 32%|███▏ | 3874/12188 [8:15:00<16:55:30, 7.33s/it] {'loss': 0.353, 'grad_norm': 0.6564787598634939, 'learning_rate': 7.981330601195781e-06, 'epoch': 0.32} + 32%|███▏ | 3874/12188 [8:15:00<16:55:30, 7.33s/it] 32%|███▏ | 3875/12188 [8:15:07<16:36:50, 7.19s/it] {'loss': 0.32, 'grad_norm': 0.5570633336826369, 'learning_rate': 7.980263828182565e-06, 'epoch': 0.32} + 32%|███▏ | 3875/12188 [8:15:07<16:36:50, 7.19s/it] 32%|███▏ | 3876/12188 [8:15:15<16:57:11, 7.34s/it] {'loss': 0.4008, 'grad_norm': 0.5964074854975797, 'learning_rate': 7.979196844707907e-06, 'epoch': 0.32} + 32%|███▏ | 3876/12188 [8:15:15<16:57:11, 7.34s/it] 32%|███▏ | 3877/12188 [8:15:22<16:45:50, 7.26s/it] {'loss': 0.3723, 'grad_norm': 0.621998611967159, 'learning_rate': 7.978129650847154e-06, 'epoch': 0.32} + 32%|███▏ | 3877/12188 [8:15:22<16:45:50, 7.26s/it] 32%|███▏ | 3878/12188 [8:15:29<16:27:15, 7.13s/it] {'loss': 0.2993, 'grad_norm': 0.6144461491455195, 'learning_rate': 7.977062246675669e-06, 'epoch': 0.32} + 32%|███▏ | 3878/12188 [8:15:29<16:27:15, 7.13s/it] 32%|███▏ | 3879/12188 [8:15:38<18:01:19, 7.81s/it] {'loss': 0.3477, 'grad_norm': 0.6643811657342329, 'learning_rate': 7.975994632268833e-06, 'epoch': 0.32} + 32%|███▏ | 3879/12188 [8:15:38<18:01:19, 7.81s/it] 32%|███▏ | 3880/12188 [8:15:46<18:14:31, 7.90s/it] {'loss': 0.3331, 'grad_norm': 0.6082270234265359, 'learning_rate': 7.974926807702034e-06, 'epoch': 0.32} + 32%|███▏ | 3880/12188 [8:15:46<18:14:31, 7.90s/it] 32%|███▏ | 3881/12188 [8:15:53<17:24:48, 7.55s/it] {'loss': 0.334, 'grad_norm': 0.6153515968846742, 'learning_rate': 7.973858773050686e-06, 'epoch': 0.32} + 32%|███▏ | 3881/12188 [8:15:53<17:24:48, 7.55s/it] 32%|███▏ | 3882/12188 [8:16:00<17:03:32, 7.39s/it] {'loss': 0.3638, 'grad_norm': 0.6608487269506883, 'learning_rate': 7.972790528390208e-06, 'epoch': 0.32} + 32%|███▏ | 3882/12188 [8:16:00<17:03:32, 7.39s/it] 32%|███▏ | 3883/12188 [8:16:07<17:00:17, 7.37s/it] {'loss': 0.3578, 'grad_norm': 0.666448065729024, 'learning_rate': 7.971722073796042e-06, 'epoch': 0.32} + 32%|███▏ | 3883/12188 [8:16:07<17:00:17, 7.37s/it] 32%|███▏ | 3884/12188 [8:16:14<16:37:13, 7.21s/it] {'loss': 0.361, 'grad_norm': 0.6552916247319747, 'learning_rate': 7.970653409343633e-06, 'epoch': 0.32} + 32%|███▏ | 3884/12188 [8:16:14<16:37:13, 7.21s/it] 32%|███▏ | 3885/12188 [8:16:23<17:30:50, 7.59s/it] {'loss': 0.3348, 'grad_norm': 0.6335559448652389, 'learning_rate': 7.969584535108457e-06, 'epoch': 0.32} + 32%|███▏ | 3885/12188 [8:16:23<17:30:50, 7.59s/it] 32%|███▏ | 3886/12188 [8:16:30<17:00:57, 7.38s/it] {'loss': 0.3577, 'grad_norm': 0.6502294675694414, 'learning_rate': 7.96851545116599e-06, 'epoch': 0.32} + 32%|███▏ | 3886/12188 [8:16:30<17:00:57, 7.38s/it] 32%|███▏ | 3887/12188 [8:16:38<17:58:53, 7.80s/it] {'loss': 0.395, 'grad_norm': 0.6053667198179379, 'learning_rate': 7.967446157591732e-06, 'epoch': 0.32} + 32%|███▏ | 3887/12188 [8:16:38<17:58:53, 7.80s/it] 32%|███▏ | 3888/12188 [8:16:46<17:40:32, 7.67s/it] {'loss': 0.3986, 'grad_norm': 0.6506269232638298, 'learning_rate': 7.966376654461192e-06, 'epoch': 0.32} + 32%|███▏ | 3888/12188 [8:16:46<17:40:32, 7.67s/it] 32%|███▏ | 3889/12188 [8:16:52<17:00:54, 7.38s/it] {'loss': 0.358, 'grad_norm': 0.6162088358801006, 'learning_rate': 7.965306941849901e-06, 'epoch': 0.32} + 32%|███▏ | 3889/12188 [8:16:52<17:00:54, 7.38s/it] 32%|███▏ | 3890/12188 [8:16:59<16:37:47, 7.21s/it] {'loss': 0.3686, 'grad_norm': 0.630536779916773, 'learning_rate': 7.964237019833397e-06, 'epoch': 0.32} + 32%|███▏ | 3890/12188 [8:16:59<16:37:47, 7.21s/it] 32%|███▏ | 3891/12188 [8:17:06<16:07:43, 7.00s/it] {'loss': 0.3995, 'grad_norm': 0.6663980459815173, 'learning_rate': 7.963166888487237e-06, 'epoch': 0.32} + 32%|███▏ | 3891/12188 [8:17:06<16:07:43, 7.00s/it] 32%|███▏ | 3892/12188 [8:17:13<16:23:54, 7.12s/it] {'loss': 0.3262, 'grad_norm': 0.6279850895461475, 'learning_rate': 7.962096547886992e-06, 'epoch': 0.32} + 32%|███▏ | 3892/12188 [8:17:13<16:23:54, 7.12s/it] 32%|███▏ | 3893/12188 [8:17:20<16:26:40, 7.14s/it] {'loss': 0.383, 'grad_norm': 0.6623162330401523, 'learning_rate': 7.961025998108247e-06, 'epoch': 0.32} + 32%|███▏ | 3893/12188 [8:17:20<16:26:40, 7.14s/it] 32%|███▏ | 3894/12188 [8:17:27<16:09:35, 7.01s/it] {'loss': 0.4196, 'grad_norm': 0.6641405726539766, 'learning_rate': 7.959955239226604e-06, 'epoch': 0.32} + 32%|███▏ | 3894/12188 [8:17:27<16:09:35, 7.01s/it] 32%|███▏ | 3895/12188 [8:17:34<16:03:14, 6.97s/it] {'loss': 0.3412, 'grad_norm': 0.6153395383701022, 'learning_rate': 7.958884271317678e-06, 'epoch': 0.32} + 32%|███▏ | 3895/12188 [8:17:34<16:03:14, 6.97s/it] 32%|███▏ | 3896/12188 [8:17:42<16:27:30, 7.15s/it] {'loss': 0.3681, 'grad_norm': 0.641901559617204, 'learning_rate': 7.957813094457098e-06, 'epoch': 0.32} + 32%|███▏ | 3896/12188 [8:17:42<16:27:30, 7.15s/it] 32%|███▏ | 3897/12188 [8:17:48<16:19:37, 7.09s/it] {'loss': 0.3541, 'grad_norm': 0.5966580130589388, 'learning_rate': 7.956741708720507e-06, 'epoch': 0.32} + 32%|███▏ | 3897/12188 [8:17:48<16:19:37, 7.09s/it] 32%|███▏ | 3898/12188 [8:17:56<16:38:45, 7.23s/it] {'loss': 0.3703, 'grad_norm': 0.60311918378115, 'learning_rate': 7.955670114183572e-06, 'epoch': 0.32} + 32%|███▏ | 3898/12188 [8:17:56<16:38:45, 7.23s/it] 32%|███▏ | 3899/12188 [8:18:03<16:38:59, 7.23s/it] {'loss': 0.3696, 'grad_norm': 0.6177892301247501, 'learning_rate': 7.954598310921959e-06, 'epoch': 0.32} + 32%|███▏ | 3899/12188 [8:18:03<16:38:59, 7.23s/it] 32%|███▏ | 3900/12188 [8:18:10<16:21:43, 7.11s/it] {'loss': 0.3844, 'grad_norm': 0.6829771448698114, 'learning_rate': 7.95352629901136e-06, 'epoch': 0.32} + 32%|███▏ | 3900/12188 [8:18:10<16:21:43, 7.11s/it] 32%|███▏ | 3901/12188 [8:18:18<16:58:15, 7.37s/it] {'loss': 0.3862, 'grad_norm': 0.6275109024662275, 'learning_rate': 7.95245407852748e-06, 'epoch': 0.32} + 32%|███▏ | 3901/12188 [8:18:18<16:58:15, 7.37s/it] 32%|███▏ | 3902/12188 [8:18:26<17:36:31, 7.65s/it] {'loss': 0.2989, 'grad_norm': 0.6787389459321248, 'learning_rate': 7.951381649546036e-06, 'epoch': 0.32} + 32%|███▏ | 3902/12188 [8:18:26<17:36:31, 7.65s/it] 32%|███▏ | 3903/12188 [8:18:33<16:52:33, 7.33s/it] {'loss': 0.3774, 'grad_norm': 0.678526754782796, 'learning_rate': 7.950309012142763e-06, 'epoch': 0.32} + 32%|███▏ | 3903/12188 [8:18:33<16:52:33, 7.33s/it] 32%|███▏ | 3904/12188 [8:18:40<16:47:03, 7.29s/it] {'loss': 0.3743, 'grad_norm': 0.6307138439151928, 'learning_rate': 7.949236166393405e-06, 'epoch': 0.32} + 32%|███▏ | 3904/12188 [8:18:40<16:47:03, 7.29s/it] 32%|███▏ | 3905/12188 [8:18:47<16:20:49, 7.10s/it] {'loss': 0.3558, 'grad_norm': 0.6175280852850934, 'learning_rate': 7.948163112373728e-06, 'epoch': 0.32} + 32%|███▏ | 3905/12188 [8:18:47<16:20:49, 7.10s/it] 32%|███▏ | 3906/12188 [8:18:54<16:05:19, 6.99s/it] {'loss': 0.3645, 'grad_norm': 0.6316745226954891, 'learning_rate': 7.94708985015951e-06, 'epoch': 0.32} + 32%|███▏ | 3906/12188 [8:18:54<16:05:19, 6.99s/it] 32%|███▏ | 3907/12188 [8:19:00<15:50:23, 6.89s/it] {'loss': 0.4113, 'grad_norm': 0.6482204479996003, 'learning_rate': 7.946016379826542e-06, 'epoch': 0.32} + 32%|███▏ | 3907/12188 [8:19:00<15:50:23, 6.89s/it] 32%|███▏ | 3908/12188 [8:19:07<15:55:41, 6.93s/it] {'loss': 0.3799, 'grad_norm': 0.6192578593041019, 'learning_rate': 7.944942701450629e-06, 'epoch': 0.32} + 32%|███▏ | 3908/12188 [8:19:07<15:55:41, 6.93s/it] 32%|███▏ | 3909/12188 [8:19:15<16:25:31, 7.14s/it] {'loss': 0.311, 'grad_norm': 0.5493903776218557, 'learning_rate': 7.943868815107594e-06, 'epoch': 0.32} + 32%|███▏ | 3909/12188 [8:19:15<16:25:31, 7.14s/it] 32%|███▏ | 3910/12188 [8:19:24<17:30:33, 7.61s/it] {'loss': 0.3777, 'grad_norm': 0.6638494056051444, 'learning_rate': 7.942794720873273e-06, 'epoch': 0.32} + 32%|███▏ | 3910/12188 [8:19:24<17:30:33, 7.61s/it] 32%|███▏ | 3911/12188 [8:19:31<17:10:35, 7.47s/it] {'loss': 0.3832, 'grad_norm': 0.657238398184785, 'learning_rate': 7.941720418823517e-06, 'epoch': 0.32} + 32%|███▏ | 3911/12188 [8:19:31<17:10:35, 7.47s/it] 32%|███▏ | 3912/12188 [8:19:38<16:43:28, 7.28s/it] {'loss': 0.3209, 'grad_norm': 0.613085975305512, 'learning_rate': 7.94064590903419e-06, 'epoch': 0.32} + 32%|███▏ | 3912/12188 [8:19:38<16:43:28, 7.28s/it] 32%|███▏ | 3913/12188 [8:19:45<16:49:13, 7.32s/it] {'loss': 0.3504, 'grad_norm': 0.6096749809994939, 'learning_rate': 7.939571191581175e-06, 'epoch': 0.32} + 32%|███▏ | 3913/12188 [8:19:45<16:49:13, 7.32s/it] 32%|███▏ | 3914/12188 [8:19:52<16:22:16, 7.12s/it] {'loss': 0.3715, 'grad_norm': 0.6712996160730604, 'learning_rate': 7.938496266540365e-06, 'epoch': 0.32} + 32%|███▏ | 3914/12188 [8:19:52<16:22:16, 7.12s/it] 32%|███▏ | 3915/12188 [8:19:59<16:13:21, 7.06s/it] {'loss': 0.3424, 'grad_norm': 0.6355582959568326, 'learning_rate': 7.937421133987668e-06, 'epoch': 0.32} + 32%|███▏ | 3915/12188 [8:19:59<16:13:21, 7.06s/it] 32%|███▏ | 3916/12188 [8:20:05<16:02:32, 6.98s/it] {'loss': 0.3256, 'grad_norm': 0.5959060926435421, 'learning_rate': 7.936345793999012e-06, 'epoch': 0.32} + 32%|███▏ | 3916/12188 [8:20:05<16:02:32, 6.98s/it] 32%|███▏ | 3917/12188 [8:20:12<16:09:21, 7.03s/it] {'loss': 0.3531, 'grad_norm': 0.6116077434544214, 'learning_rate': 7.935270246650332e-06, 'epoch': 0.32} + 32%|███▏ | 3917/12188 [8:20:12<16:09:21, 7.03s/it] 32%|███▏ | 3918/12188 [8:20:23<18:18:26, 7.97s/it] {'loss': 0.3492, 'grad_norm': 0.5896952478749731, 'learning_rate': 7.934194492017586e-06, 'epoch': 0.32} + 32%|███▏ | 3918/12188 [8:20:23<18:18:26, 7.97s/it] 32%|███▏ | 3919/12188 [8:20:30<18:03:18, 7.86s/it] {'loss': 0.3496, 'grad_norm': 0.5928023536210993, 'learning_rate': 7.933118530176737e-06, 'epoch': 0.32} + 32%|███▏ | 3919/12188 [8:20:30<18:03:18, 7.86s/it] 32%|███▏ | 3920/12188 [8:20:37<17:32:03, 7.63s/it] {'loss': 0.3371, 'grad_norm': 0.6142095992498606, 'learning_rate': 7.93204236120377e-06, 'epoch': 0.32} + 32%|███▏ | 3920/12188 [8:20:37<17:32:03, 7.63s/it] 32%|███▏ | 3921/12188 [8:20:44<16:56:07, 7.37s/it] {'loss': 0.3442, 'grad_norm': 0.5970911669404029, 'learning_rate': 7.930965985174682e-06, 'epoch': 0.32} + 32%|███▏ | 3921/12188 [8:20:44<16:56:07, 7.37s/it] 32%|███▏ | 3922/12188 [8:20:51<16:34:44, 7.22s/it] {'loss': 0.3437, 'grad_norm': 0.6333611954280999, 'learning_rate': 7.929889402165485e-06, 'epoch': 0.32} + 32%|███▏ | 3922/12188 [8:20:51<16:34:44, 7.22s/it] 32%|███▏ | 3923/12188 [8:21:04<20:46:43, 9.05s/it] {'loss': 0.3413, 'grad_norm': 0.668037308259036, 'learning_rate': 7.928812612252206e-06, 'epoch': 0.32} + 32%|███▏ | 3923/12188 [8:21:04<20:46:43, 9.05s/it] 32%|███▏ | 3924/12188 [8:21:11<19:23:52, 8.45s/it] {'loss': 0.3825, 'grad_norm': 0.6198391666417926, 'learning_rate': 7.927735615510885e-06, 'epoch': 0.32} + 32%|███▏ | 3924/12188 [8:21:11<19:23:52, 8.45s/it] 32%|███▏ | 3925/12188 [8:21:19<18:45:18, 8.17s/it] {'loss': 0.3455, 'grad_norm': 0.5677706508354115, 'learning_rate': 7.926658412017578e-06, 'epoch': 0.32} + 32%|███▏ | 3925/12188 [8:21:19<18:45:18, 8.17s/it] 32%|███▏ | 3926/12188 [8:21:26<17:51:15, 7.78s/it] {'loss': 0.3438, 'grad_norm': 0.6068829409841491, 'learning_rate': 7.925581001848359e-06, 'epoch': 0.32} + 32%|███▏ | 3926/12188 [8:21:26<17:51:15, 7.78s/it] 32%|███▏ | 3927/12188 [8:21:34<18:15:58, 7.96s/it] {'loss': 0.3759, 'grad_norm': 0.5849193731907085, 'learning_rate': 7.924503385079308e-06, 'epoch': 0.32} + 32%|███▏ | 3927/12188 [8:21:34<18:15:58, 7.96s/it] 32%|███▏ | 3928/12188 [8:21:41<17:39:56, 7.70s/it] {'loss': 0.3561, 'grad_norm': 0.6251760323296239, 'learning_rate': 7.923425561786526e-06, 'epoch': 0.32} + 32%|███▏ | 3928/12188 [8:21:41<17:39:56, 7.70s/it] 32%|███▏ | 3929/12188 [8:21:48<16:50:39, 7.34s/it] {'loss': 0.3453, 'grad_norm': 0.6302712915761736, 'learning_rate': 7.922347532046128e-06, 'epoch': 0.32} + 32%|███▏ | 3929/12188 [8:21:48<16:50:39, 7.34s/it] 32%|███▏ | 3930/12188 [8:21:56<17:18:34, 7.55s/it] {'loss': 0.35, 'grad_norm': 0.615151293597076, 'learning_rate': 7.921269295934243e-06, 'epoch': 0.32} + 32%|███▏ | 3930/12188 [8:21:56<17:18:34, 7.55s/it] 32%|███▏ | 3931/12188 [8:22:04<17:33:53, 7.66s/it] {'loss': 0.3537, 'grad_norm': 0.6171327380729658, 'learning_rate': 7.920190853527015e-06, 'epoch': 0.32} + 32%|███▏ | 3931/12188 [8:22:04<17:33:53, 7.66s/it] 32%|███▏ | 3932/12188 [8:22:11<17:21:16, 7.57s/it] {'loss': 0.365, 'grad_norm': 0.6034391738772455, 'learning_rate': 7.919112204900597e-06, 'epoch': 0.32} + 32%|███▏ | 3932/12188 [8:22:11<17:21:16, 7.57s/it] 32%|███▏ | 3933/12188 [8:22:18<16:43:22, 7.29s/it] {'loss': 0.3327, 'grad_norm': 0.6325137612226737, 'learning_rate': 7.918033350131167e-06, 'epoch': 0.32} + 32%|███▏ | 3933/12188 [8:22:18<16:43:22, 7.29s/it] 32%|███▏ | 3934/12188 [8:22:27<17:51:54, 7.79s/it] {'loss': 0.3503, 'grad_norm': 0.6159699568866887, 'learning_rate': 7.916954289294911e-06, 'epoch': 0.32} + 32%|███▏ | 3934/12188 [8:22:27<17:51:54, 7.79s/it] 32%|███▏ | 3935/12188 [8:22:34<17:14:20, 7.52s/it] {'loss': 0.3287, 'grad_norm': 0.6142462815445192, 'learning_rate': 7.915875022468028e-06, 'epoch': 0.32} + 32%|███▏ | 3935/12188 [8:22:34<17:14:20, 7.52s/it] 32%|███▏ | 3936/12188 [8:22:40<16:43:26, 7.30s/it] {'loss': 0.322, 'grad_norm': 0.6512363324862457, 'learning_rate': 7.914795549726736e-06, 'epoch': 0.32} + 32%|███▏ | 3936/12188 [8:22:40<16:43:26, 7.30s/it] 32%|███▏ | 3937/12188 [8:22:48<17:20:56, 7.57s/it] {'loss': 0.3492, 'grad_norm': 0.6393467987430286, 'learning_rate': 7.913715871147265e-06, 'epoch': 0.32} + 32%|███▏ | 3937/12188 [8:22:49<17:20:56, 7.57s/it] 32%|███▏ | 3938/12188 [8:22:56<16:57:50, 7.40s/it] {'loss': 0.3404, 'grad_norm': 0.6283074626994584, 'learning_rate': 7.91263598680586e-06, 'epoch': 0.32} + 32%|███▏ | 3938/12188 [8:22:56<16:57:50, 7.40s/it] 32%|███▏ | 3939/12188 [8:23:03<16:43:14, 7.30s/it] {'loss': 0.3643, 'grad_norm': 0.6387035459125884, 'learning_rate': 7.911555896778783e-06, 'epoch': 0.32} + 32%|███▏ | 3939/12188 [8:23:03<16:43:14, 7.30s/it] 32%|███▏ | 3940/12188 [8:23:10<16:33:49, 7.23s/it] {'loss': 0.337, 'grad_norm': 0.6354296279015863, 'learning_rate': 7.910475601142305e-06, 'epoch': 0.32} + 32%|███▏ | 3940/12188 [8:23:10<16:33:49, 7.23s/it] 32%|███▏ | 3941/12188 [8:23:17<16:47:51, 7.33s/it] {'loss': 0.3715, 'grad_norm': 0.6532515328017992, 'learning_rate': 7.909395099972716e-06, 'epoch': 0.32} + 32%|███▏ | 3941/12188 [8:23:17<16:47:51, 7.33s/it] 32%|███▏ | 3942/12188 [8:23:25<17:14:42, 7.53s/it] {'loss': 0.4139, 'grad_norm': 0.622138297593448, 'learning_rate': 7.90831439334632e-06, 'epoch': 0.32} + 32%|███▏ | 3942/12188 [8:23:25<17:14:42, 7.53s/it] 32%|███▏ | 3943/12188 [8:23:33<17:14:09, 7.53s/it] {'loss': 0.3879, 'grad_norm': 0.630292571790943, 'learning_rate': 7.907233481339433e-06, 'epoch': 0.32} + 32%|███▏ | 3943/12188 [8:23:33<17:14:09, 7.53s/it] 32%|███▏ | 3944/12188 [8:23:44<20:06:36, 8.78s/it] {'loss': 0.3535, 'grad_norm': 0.5981378479419311, 'learning_rate': 7.90615236402839e-06, 'epoch': 0.32} + 32%|███▏ | 3944/12188 [8:23:44<20:06:36, 8.78s/it] 32%|███▏ | 3945/12188 [8:23:51<18:40:29, 8.16s/it] {'loss': 0.3324, 'grad_norm': 0.6188383266273982, 'learning_rate': 7.905071041489536e-06, 'epoch': 0.32} + 32%|███▏ | 3945/12188 [8:23:51<18:40:29, 8.16s/it] 32%|███▏ | 3946/12188 [8:23:58<18:00:23, 7.87s/it] {'loss': 0.3721, 'grad_norm': 0.67888223330206, 'learning_rate': 7.903989513799231e-06, 'epoch': 0.32} + 32%|███▏ | 3946/12188 [8:23:58<18:00:23, 7.87s/it] 32%|███▏ | 3947/12188 [8:24:05<17:17:34, 7.55s/it] {'loss': 0.3288, 'grad_norm': 0.6591709735704394, 'learning_rate': 7.902907781033852e-06, 'epoch': 0.32} + 32%|███▏ | 3947/12188 [8:24:05<17:17:34, 7.55s/it] 32%|███▏ | 3948/12188 [8:24:13<17:11:01, 7.51s/it] {'loss': 0.3832, 'grad_norm': 0.7320782829009737, 'learning_rate': 7.901825843269791e-06, 'epoch': 0.32} + 32%|███▏ | 3948/12188 [8:24:13<17:11:01, 7.51s/it] 32%|███▏ | 3949/12188 [8:24:19<16:43:43, 7.31s/it] {'loss': 0.3644, 'grad_norm': 0.6374901607662895, 'learning_rate': 7.90074370058345e-06, 'epoch': 0.32} + 32%|███▏ | 3949/12188 [8:24:19<16:43:43, 7.31s/it] 32%|███▏ | 3950/12188 [8:24:27<16:37:08, 7.26s/it] {'loss': 0.3264, 'grad_norm': 0.6755055834030602, 'learning_rate': 7.89966135305125e-06, 'epoch': 0.32} + 32%|███▏ | 3950/12188 [8:24:27<16:37:08, 7.26s/it] 32%|███▏ | 3951/12188 [8:24:34<17:03:23, 7.45s/it] {'loss': 0.3506, 'grad_norm': 0.6265540477858386, 'learning_rate': 7.898578800749623e-06, 'epoch': 0.32} + 32%|███▏ | 3951/12188 [8:24:34<17:03:23, 7.45s/it] 32%|███▏ | 3952/12188 [8:24:42<16:49:55, 7.36s/it] {'loss': 0.3591, 'grad_norm': 0.6594968579770331, 'learning_rate': 7.89749604375502e-06, 'epoch': 0.32} + 32%|███▏ | 3952/12188 [8:24:42<16:49:55, 7.36s/it] 32%|███▏ | 3953/12188 [8:24:48<16:12:44, 7.09s/it] {'loss': 0.3501, 'grad_norm': 0.7697289669851164, 'learning_rate': 7.896413082143898e-06, 'epoch': 0.32} + 32%|███▏ | 3953/12188 [8:24:48<16:12:44, 7.09s/it] 32%|███▏ | 3954/12188 [8:24:55<16:12:00, 7.08s/it] {'loss': 0.351, 'grad_norm': 0.6623318197396063, 'learning_rate': 7.89532991599274e-06, 'epoch': 0.32} + 32%|███▏ | 3954/12188 [8:24:55<16:12:00, 7.08s/it] 32%|███▏ | 3955/12188 [8:25:03<16:43:31, 7.31s/it] {'loss': 0.3473, 'grad_norm': 0.5904079543831577, 'learning_rate': 7.894246545378037e-06, 'epoch': 0.32} + 32%|███▏ | 3955/12188 [8:25:03<16:43:31, 7.31s/it] 32%|███▏ | 3956/12188 [8:25:10<16:34:00, 7.24s/it] {'loss': 0.3656, 'grad_norm': 0.6185702569174376, 'learning_rate': 7.89316297037629e-06, 'epoch': 0.32} + 32%|███▏ | 3956/12188 [8:25:10<16:34:00, 7.24s/it] 32%|███▏ | 3957/12188 [8:25:17<16:18:46, 7.13s/it] {'loss': 0.3511, 'grad_norm': 0.6074512031440279, 'learning_rate': 7.892079191064022e-06, 'epoch': 0.32} + 32%|███▏ | 3957/12188 [8:25:17<16:18:46, 7.13s/it] 32%|███▏ | 3958/12188 [8:25:24<16:07:31, 7.05s/it] {'loss': 0.3837, 'grad_norm': 0.6520430268789338, 'learning_rate': 7.89099520751777e-06, 'epoch': 0.32} + 32%|███▏ | 3958/12188 [8:25:24<16:07:31, 7.05s/it] 32%|███▏ | 3959/12188 [8:25:31<15:54:39, 6.96s/it] {'loss': 0.3539, 'grad_norm': 0.5914351613926238, 'learning_rate': 7.88991101981408e-06, 'epoch': 0.32} + 32%|███▏ | 3959/12188 [8:25:31<15:54:39, 6.96s/it] 32%|███▏ | 3960/12188 [8:25:37<15:41:00, 6.86s/it] {'loss': 0.3488, 'grad_norm': 0.6579085554748094, 'learning_rate': 7.888826628029515e-06, 'epoch': 0.32} + 32%|███▏ | 3960/12188 [8:25:37<15:41:00, 6.86s/it] 32%|███▏ | 3961/12188 [8:25:44<15:48:38, 6.92s/it] {'loss': 0.3692, 'grad_norm': 0.6413105503159083, 'learning_rate': 7.887742032240658e-06, 'epoch': 0.32} + 32%|███▏ | 3961/12188 [8:25:44<15:48:38, 6.92s/it] 33%|███▎ | 3962/12188 [8:25:52<16:34:53, 7.26s/it] {'loss': 0.3224, 'grad_norm': 0.5844974551054252, 'learning_rate': 7.886657232524097e-06, 'epoch': 0.33} + 33%|███▎ | 3962/12188 [8:25:52<16:34:53, 7.26s/it] 33%|███▎ | 3963/12188 [8:25:59<16:31:32, 7.23s/it] {'loss': 0.3554, 'grad_norm': 0.6154527558402214, 'learning_rate': 7.885572228956439e-06, 'epoch': 0.33} + 33%|███▎ | 3963/12188 [8:25:59<16:31:32, 7.23s/it] 33%|███▎ | 3964/12188 [8:26:07<16:57:44, 7.43s/it] {'loss': 0.3457, 'grad_norm': 0.5936541588595473, 'learning_rate': 7.884487021614307e-06, 'epoch': 0.33} + 33%|███▎ | 3964/12188 [8:26:07<16:57:44, 7.43s/it] 33%|███▎ | 3965/12188 [8:26:14<16:36:56, 7.27s/it] {'loss': 0.3615, 'grad_norm': 0.6693585047527416, 'learning_rate': 7.883401610574338e-06, 'epoch': 0.33} + 33%|███▎ | 3965/12188 [8:26:14<16:36:56, 7.27s/it] 33%|███▎ | 3966/12188 [8:26:22<16:40:00, 7.30s/it] {'loss': 0.3536, 'grad_norm': 0.6224291688012054, 'learning_rate': 7.882315995913177e-06, 'epoch': 0.33} + 33%|███▎ | 3966/12188 [8:26:22<16:40:00, 7.30s/it] 33%|███▎ | 3967/12188 [8:26:29<16:26:44, 7.20s/it] {'loss': 0.324, 'grad_norm': 0.6607569437566597, 'learning_rate': 7.88123017770749e-06, 'epoch': 0.33} + 33%|███▎ | 3967/12188 [8:26:29<16:26:44, 7.20s/it] 33%|███▎ | 3968/12188 [8:26:35<16:15:42, 7.12s/it] {'loss': 0.3587, 'grad_norm': 0.6005491087793292, 'learning_rate': 7.88014415603396e-06, 'epoch': 0.33} + 33%|███▎ | 3968/12188 [8:26:35<16:15:42, 7.12s/it] 33%|███▎ | 3969/12188 [8:26:42<15:48:03, 6.92s/it] {'loss': 0.3933, 'grad_norm': 0.675947071963376, 'learning_rate': 7.879057930969275e-06, 'epoch': 0.33} + 33%|███▎ | 3969/12188 [8:26:42<15:48:03, 6.92s/it] 33%|███▎ | 3970/12188 [8:26:49<15:46:31, 6.91s/it] {'loss': 0.3611, 'grad_norm': 0.7883656661439067, 'learning_rate': 7.877971502590146e-06, 'epoch': 0.33} + 33%|███▎ | 3970/12188 [8:26:49<15:46:31, 6.91s/it] 33%|███▎ | 3971/12188 [8:26:56<15:59:16, 7.00s/it] {'loss': 0.3327, 'grad_norm': 0.6727351513975756, 'learning_rate': 7.876884870973292e-06, 'epoch': 0.33} + 33%|███▎ | 3971/12188 [8:26:56<15:59:16, 7.00s/it] 33%|███▎ | 3972/12188 [8:27:03<15:57:28, 6.99s/it] {'loss': 0.3465, 'grad_norm': 0.6800786748191792, 'learning_rate': 7.875798036195452e-06, 'epoch': 0.33} + 33%|███▎ | 3972/12188 [8:27:03<15:57:28, 6.99s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fcea39d1350> +[Try #0] Failed to fetch sample 4492653 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fcea39d1350> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Frequently visited'"}, {'from': 'gpt', 'value': '\nclick(x=0.6635, y=0.185)\n'}]} + 33%|███▎ | 3973/12188 [8:27:10<15:54:49, 6.97s/it] {'loss': 0.3507, 'grad_norm': 0.5973085298349345, 'learning_rate': 7.874710998333374e-06, 'epoch': 0.33} + 33%|███▎ | 3973/12188 [8:27:10<15:54:49, 6.97s/it] 33%|███▎ | 3974/12188 [8:27:17<15:50:08, 6.94s/it] {'loss': 0.3184, 'grad_norm': 0.5858483849414303, 'learning_rate': 7.873623757463823e-06, 'epoch': 0.33} + 33%|███▎ | 3974/12188 [8:27:17<15:50:08, 6.94s/it] 33%|███▎ | 3975/12188 [8:27:25<16:58:38, 7.44s/it] {'loss': 0.3642, 'grad_norm': 0.6881163131693194, 'learning_rate': 7.87253631366358e-06, 'epoch': 0.33} + 33%|███▎ | 3975/12188 [8:27:25<16:58:38, 7.44s/it] 33%|███▎ | 3976/12188 [8:27:33<16:56:49, 7.43s/it] {'loss': 0.3553, 'grad_norm': 0.6488852563146476, 'learning_rate': 7.871448667009439e-06, 'epoch': 0.33} + 33%|███▎ | 3976/12188 [8:27:33<16:56:49, 7.43s/it] 33%|███▎ | 3977/12188 [8:27:40<16:30:51, 7.24s/it] {'loss': 0.3338, 'grad_norm': 0.6228542435813113, 'learning_rate': 7.870360817578204e-06, 'epoch': 0.33} + 33%|███▎ | 3977/12188 [8:27:40<16:30:51, 7.24s/it] 33%|███▎ | 3978/12188 [8:27:46<16:08:27, 7.08s/it] {'loss': 0.3881, 'grad_norm': 0.6572146851318225, 'learning_rate': 7.869272765446701e-06, 'epoch': 0.33} + 33%|███▎ | 3978/12188 [8:27:46<16:08:27, 7.08s/it] 33%|███▎ | 3979/12188 [8:27:53<16:08:25, 7.08s/it] {'loss': 0.3479, 'grad_norm': 0.6616244550715745, 'learning_rate': 7.868184510691764e-06, 'epoch': 0.33} + 33%|███▎ | 3979/12188 [8:27:53<16:08:25, 7.08s/it] 33%|███▎ | 3980/12188 [8:28:01<16:46:52, 7.36s/it] {'loss': 0.3682, 'grad_norm': 0.6349049333045897, 'learning_rate': 7.867096053390245e-06, 'epoch': 0.33} + 33%|███▎ | 3980/12188 [8:28:01<16:46:52, 7.36s/it] 33%|███▎ | 3981/12188 [8:28:08<16:22:58, 7.19s/it] {'loss': 0.4019, 'grad_norm': 0.6403071683367509, 'learning_rate': 7.86600739361901e-06, 'epoch': 0.33} + 33%|███▎ | 3981/12188 [8:28:08<16:22:58, 7.19s/it] 33%|███▎ | 3982/12188 [8:28:17<17:14:56, 7.57s/it] {'loss': 0.3528, 'grad_norm': 0.6607855779424673, 'learning_rate': 7.864918531454938e-06, 'epoch': 0.33} + 33%|███▎ | 3982/12188 [8:28:17<17:14:56, 7.57s/it] 33%|███▎ | 3983/12188 [8:28:24<17:01:48, 7.47s/it] {'loss': 0.3394, 'grad_norm': 0.6601597929430983, 'learning_rate': 7.863829466974922e-06, 'epoch': 0.33} + 33%|███▎ | 3983/12188 [8:28:24<17:01:48, 7.47s/it] 33%|███▎ | 3984/12188 [8:28:31<16:57:45, 7.44s/it] {'loss': 0.3424, 'grad_norm': 0.6296103185973102, 'learning_rate': 7.86274020025587e-06, 'epoch': 0.33} + 33%|███▎ | 3984/12188 [8:28:31<16:57:45, 7.44s/it] 33%|███▎ | 3985/12188 [8:28:39<16:54:57, 7.42s/it] {'loss': 0.3425, 'grad_norm': 0.7159922709197738, 'learning_rate': 7.861650731374704e-06, 'epoch': 0.33} + 33%|███▎ | 3985/12188 [8:28:39<16:54:57, 7.42s/it] 33%|███▎ | 3986/12188 [8:28:45<16:23:14, 7.19s/it] {'loss': 0.3573, 'grad_norm': 0.6561541873508885, 'learning_rate': 7.860561060408361e-06, 'epoch': 0.33} + 33%|███▎ | 3986/12188 [8:28:45<16:23:14, 7.19s/it] 33%|███▎ | 3987/12188 [8:28:52<16:12:19, 7.11s/it] {'loss': 0.3192, 'grad_norm': 0.5897368470674484, 'learning_rate': 7.85947118743379e-06, 'epoch': 0.33} + 33%|███▎ | 3987/12188 [8:28:52<16:12:19, 7.11s/it] 33%|███▎ | 3988/12188 [8:28:59<16:04:49, 7.06s/it] {'loss': 0.3378, 'grad_norm': 0.6279699200658293, 'learning_rate': 7.85838111252796e-06, 'epoch': 0.33} + 33%|███▎ | 3988/12188 [8:28:59<16:04:49, 7.06s/it] 33%|███▎ | 3989/12188 [8:29:06<16:10:20, 7.10s/it] {'loss': 0.3511, 'grad_norm': 0.6824885757754671, 'learning_rate': 7.857290835767847e-06, 'epoch': 0.33} + 33%|███▎ | 3989/12188 [8:29:06<16:10:20, 7.10s/it] 33%|███▎ | 3990/12188 [8:29:13<15:47:53, 6.94s/it] {'loss': 0.331, 'grad_norm': 0.671245271637452, 'learning_rate': 7.856200357230448e-06, 'epoch': 0.33} + 33%|███▎ | 3990/12188 [8:29:13<15:47:53, 6.94s/it] 33%|███▎ | 3991/12188 [8:29:20<16:06:10, 7.07s/it] {'loss': 0.39, 'grad_norm': 0.6078551324477071, 'learning_rate': 7.855109676992767e-06, 'epoch': 0.33} + 33%|███▎ | 3991/12188 [8:29:20<16:06:10, 7.07s/it] 33%|███▎ | 3992/12188 [8:29:30<17:36:28, 7.73s/it] {'loss': 0.3096, 'grad_norm': 0.5885925827695659, 'learning_rate': 7.854018795131827e-06, 'epoch': 0.33} + 33%|███▎ | 3992/12188 [8:29:30<17:36:28, 7.73s/it] 33%|███▎ | 3993/12188 [8:29:36<16:55:33, 7.44s/it] {'loss': 0.3459, 'grad_norm': 0.7341562081010585, 'learning_rate': 7.852927711724665e-06, 'epoch': 0.33} + 33%|███▎ | 3993/12188 [8:29:36<16:55:33, 7.44s/it] 33%|███▎ | 3994/12188 [8:29:44<16:54:59, 7.43s/it] {'loss': 0.3648, 'grad_norm': 0.63300688310773, 'learning_rate': 7.851836426848334e-06, 'epoch': 0.33} + 33%|███▎ | 3994/12188 [8:29:44<16:54:59, 7.43s/it] 33%|███▎ | 3995/12188 [8:29:51<16:29:07, 7.24s/it] {'loss': 0.3589, 'grad_norm': 0.632139299535845, 'learning_rate': 7.850744940579894e-06, 'epoch': 0.33} + 33%|███▎ | 3995/12188 [8:29:51<16:29:07, 7.24s/it] 33%|███▎ | 3996/12188 [8:29:58<16:31:02, 7.26s/it] {'loss': 0.3908, 'grad_norm': 0.6821673304043518, 'learning_rate': 7.849653252996426e-06, 'epoch': 0.33} + 33%|███▎ | 3996/12188 [8:29:58<16:31:02, 7.26s/it] 33%|███▎ | 3997/12188 [8:30:05<16:35:36, 7.29s/it] {'loss': 0.3577, 'grad_norm': 0.6075171025690768, 'learning_rate': 7.848561364175024e-06, 'epoch': 0.33} + 33%|███▎ | 3997/12188 [8:30:05<16:35:36, 7.29s/it] 33%|███▎ | 3998/12188 [8:30:13<16:41:01, 7.33s/it] {'loss': 0.3702, 'grad_norm': 0.6932375966761548, 'learning_rate': 7.847469274192797e-06, 'epoch': 0.33} + 33%|███▎ | 3998/12188 [8:30:13<16:41:01, 7.33s/it] 33%|███▎ | 3999/12188 [8:30:20<16:27:57, 7.24s/it] {'loss': 0.3884, 'grad_norm': 0.653687992792476, 'learning_rate': 7.846376983126864e-06, 'epoch': 0.33} + 33%|███▎ | 3999/12188 [8:30:20<16:27:57, 7.24s/it] 33%|███▎ | 4000/12188 [8:30:27<16:15:19, 7.15s/it] {'loss': 0.3588, 'grad_norm': 0.6518183150477391, 'learning_rate': 7.845284491054362e-06, 'epoch': 0.33} + 33%|███▎ | 4000/12188 [8:30:27<16:15:19, 7.15s/it]/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/utils/checkpoint.py:87: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( + 33%|███▎ | 4001/12188 [8:30:49<26:47:15, 11.78s/it] {'loss': 0.3371, 'grad_norm': 0.6375284604467827, 'learning_rate': 7.844191798052438e-06, 'epoch': 0.33} + 33%|███▎ | 4001/12188 [8:30:49<26:47:15, 11.78s/it] 33%|███▎ | 4002/12188 [8:30:56<23:12:27, 10.21s/it] {'loss': 0.3363, 'grad_norm': 0.6265765863855154, 'learning_rate': 7.84309890419826e-06, 'epoch': 0.33} + 33%|███▎ | 4002/12188 [8:30:56<23:12:27, 10.21s/it] 33%|███▎ | 4003/12188 [8:31:04<21:35:53, 9.50s/it] {'loss': 0.3869, 'grad_norm': 0.6280739753315693, 'learning_rate': 7.842005809569006e-06, 'epoch': 0.33} + 33%|███▎ | 4003/12188 [8:31:04<21:35:53, 9.50s/it] 33%|███▎ | 4004/12188 [8:31:10<19:35:52, 8.62s/it] {'loss': 0.3304, 'grad_norm': 0.6228477258996614, 'learning_rate': 7.840912514241867e-06, 'epoch': 0.33} + 33%|███▎ | 4004/12188 [8:31:10<19:35:52, 8.62s/it] 33%|███▎ | 4005/12188 [8:31:21<20:53:08, 9.19s/it] {'loss': 0.3623, 'grad_norm': 0.6045811158920567, 'learning_rate': 7.83981901829405e-06, 'epoch': 0.33} + 33%|███▎ | 4005/12188 [8:31:21<20:53:08, 9.19s/it] 33%|███▎ | 4006/12188 [8:31:28<19:30:14, 8.58s/it] {'loss': 0.3749, 'grad_norm': 0.602334073140482, 'learning_rate': 7.838725321802777e-06, 'epoch': 0.33} + 33%|███▎ | 4006/12188 [8:31:28<19:30:14, 8.58s/it] 33%|███▎ | 4007/12188 [8:31:35<18:35:22, 8.18s/it] {'loss': 0.3639, 'grad_norm': 0.6356108172209286, 'learning_rate': 7.837631424845283e-06, 'epoch': 0.33} + 33%|███▎ | 4007/12188 [8:31:35<18:35:22, 8.18s/it] 33%|███▎ | 4008/12188 [8:31:42<17:58:36, 7.91s/it] {'loss': 0.3501, 'grad_norm': 0.586167161747632, 'learning_rate': 7.836537327498816e-06, 'epoch': 0.33} + 33%|███▎ | 4008/12188 [8:31:42<17:58:36, 7.91s/it] 33%|███▎ | 4009/12188 [8:31:50<17:29:42, 7.70s/it] {'loss': 0.3478, 'grad_norm': 0.5955764241018714, 'learning_rate': 7.835443029840641e-06, 'epoch': 0.33} + 33%|██���▎ | 4009/12188 [8:31:50<17:29:42, 7.70s/it] 33%|███▎ | 4010/12188 [8:31:56<16:48:54, 7.40s/it] {'loss': 0.3623, 'grad_norm': 0.6310876489378392, 'learning_rate': 7.834348531948033e-06, 'epoch': 0.33} + 33%|███▎ | 4010/12188 [8:31:56<16:48:54, 7.40s/it] 33%|███▎ | 4011/12188 [8:32:04<16:57:37, 7.47s/it] {'loss': 0.335, 'grad_norm': 0.6013697302010771, 'learning_rate': 7.833253833898287e-06, 'epoch': 0.33} + 33%|███▎ | 4011/12188 [8:32:04<16:57:37, 7.47s/it] 33%|███▎ | 4012/12188 [8:32:12<17:29:15, 7.70s/it] {'loss': 0.3763, 'grad_norm': 0.6137811050045451, 'learning_rate': 7.832158935768708e-06, 'epoch': 0.33} + 33%|███▎ | 4012/12188 [8:32:12<17:29:15, 7.70s/it] 33%|███▎ | 4013/12188 [8:32:20<17:52:36, 7.87s/it] {'loss': 0.3547, 'grad_norm': 0.5985477812643535, 'learning_rate': 7.831063837636613e-06, 'epoch': 0.33} + 33%|███▎ | 4013/12188 [8:32:20<17:52:36, 7.87s/it] 33%|███▎ | 4014/12188 [8:32:27<17:12:39, 7.58s/it] {'loss': 0.3409, 'grad_norm': 0.7077727514563682, 'learning_rate': 7.82996853957934e-06, 'epoch': 0.33} + 33%|███▎ | 4014/12188 [8:32:27<17:12:39, 7.58s/it] 33%|███▎ | 4015/12188 [8:32:35<17:25:12, 7.67s/it] {'loss': 0.341, 'grad_norm': 0.6619767304476943, 'learning_rate': 7.828873041674234e-06, 'epoch': 0.33} + 33%|███▎ | 4015/12188 [8:32:35<17:25:12, 7.67s/it] 33%|███▎ | 4016/12188 [8:32:42<16:56:46, 7.47s/it] {'loss': 0.3627, 'grad_norm': 0.7033161918899659, 'learning_rate': 7.827777343998659e-06, 'epoch': 0.33} + 33%|███▎ | 4016/12188 [8:32:42<16:56:46, 7.47s/it] 33%|███▎ | 4017/12188 [8:32:49<16:27:00, 7.25s/it] {'loss': 0.3629, 'grad_norm': 0.6036607667061773, 'learning_rate': 7.826681446629994e-06, 'epoch': 0.33} + 33%|███▎ | 4017/12188 [8:32:49<16:27:00, 7.25s/it] 33%|███▎ | 4018/12188 [8:32:56<16:11:54, 7.14s/it] {'loss': 0.3247, 'grad_norm': 0.612501694517382, 'learning_rate': 7.825585349645623e-06, 'epoch': 0.33} + 33%|███▎ | 4018/12188 [8:32:56<16:11:54, 7.14s/it] 33%|███▎ | 4019/12188 [8:33:03<16:16:27, 7.17s/it] {'loss': 0.3841, 'grad_norm': 0.5988654229207765, 'learning_rate': 7.824489053122955e-06, 'epoch': 0.33} + 33%|███▎ | 4019/12188 [8:33:03<16:16:27, 7.17s/it] 33%|███▎ | 4020/12188 [8:33:11<16:30:08, 7.27s/it] {'loss': 0.3651, 'grad_norm': 0.6305634578819034, 'learning_rate': 7.82339255713941e-06, 'epoch': 0.33} + 33%|███▎ | 4020/12188 [8:33:11<16:30:08, 7.27s/it] 33%|███▎ | 4021/12188 [8:33:18<16:22:10, 7.22s/it] {'loss': 0.3804, 'grad_norm': 0.6637110888019846, 'learning_rate': 7.822295861772416e-06, 'epoch': 0.33} + 33%|███▎ | 4021/12188 [8:33:18<16:22:10, 7.22s/it] 33%|███▎ | 4022/12188 [8:33:25<16:20:36, 7.21s/it] {'loss': 0.3629, 'grad_norm': 0.6562247816091767, 'learning_rate': 7.821198967099424e-06, 'epoch': 0.33} + 33%|███▎ | 4022/12188 [8:33:25<16:20:36, 7.21s/it] 33%|███▎ | 4023/12188 [8:33:32<16:31:50, 7.29s/it] {'loss': 0.3293, 'grad_norm': 0.6088352522734316, 'learning_rate': 7.820101873197893e-06, 'epoch': 0.33} + 33%|███▎ | 4023/12188 [8:33:32<16:31:50, 7.29s/it] 33%|███▎ | 4024/12188 [8:33:40<17:07:03, 7.55s/it] {'loss': 0.3341, 'grad_norm': 0.6499286980283207, 'learning_rate': 7.819004580145298e-06, 'epoch': 0.33} + 33%|███▎ | 4024/12188 [8:33:40<17:07:03, 7.55s/it] 33%|███▎ | 4025/12188 [8:33:48<17:09:05, 7.56s/it] {'loss': 0.3614, 'grad_norm': 0.7052735316645613, 'learning_rate': 7.81790708801913e-06, 'epoch': 0.33} + 33%|███▎ | 4025/12188 [8:33:48<17:09:05, 7.56s/it] 33%|███▎ | 4026/12188 [8:33:56<17:32:50, 7.74s/it] {'loss': 0.3569, 'grad_norm': 0.6467460278317205, 'learning_rate': 7.81680939689689e-06, 'epoch': 0.33} + 33%|███▎ | 4026/12188 [8:33:56<17:32:50, 7.74s/it] 33%|███▎ | 4027/12188 [8:34:04<17:24:32, 7.68s/it] {'loss': 0.3707, 'grad_norm': 0.6204054104661068, 'learning_rate': 7.815711506856097e-06, 'epoch': 0.33} + 33%|███▎ | 4027/12188 [8:34:04<17:24:32, 7.68s/it] 33%|███▎ | 4028/12188 [8:34:12<18:08:06, 8.00s/it] {'loss': 0.344, 'grad_norm': 0.6090689313770575, 'learning_rate': 7.81461341797428e-06, 'epoch': 0.33} + 33%|███▎ | 4028/12188 [8:34:13<18:08:06, 8.00s/it] 33%|███▎ | 4029/12188 [8:34:19<17:11:27, 7.59s/it] {'loss': 0.3664, 'grad_norm': 0.7342942312196887, 'learning_rate': 7.813515130328985e-06, 'epoch': 0.33} + 33%|███▎ | 4029/12188 [8:34:19<17:11:27, 7.59s/it] 33%|███▎ | 4030/12188 [8:34:27<17:07:19, 7.56s/it] {'loss': 0.3671, 'grad_norm': 0.657810736799398, 'learning_rate': 7.812416643997773e-06, 'epoch': 0.33} + 33%|███▎ | 4030/12188 [8:34:27<17:07:19, 7.56s/it] 33%|███▎ | 4031/12188 [8:34:34<17:09:53, 7.58s/it] {'loss': 0.3305, 'grad_norm': 0.639405861422662, 'learning_rate': 7.811317959058217e-06, 'epoch': 0.33} + 33%|███▎ | 4031/12188 [8:34:34<17:09:53, 7.58s/it] 33%|███▎ | 4032/12188 [8:34:41<16:34:48, 7.32s/it] {'loss': 0.3788, 'grad_norm': 0.6006220226280254, 'learning_rate': 7.810219075587901e-06, 'epoch': 0.33} + 33%|███▎ | 4032/12188 [8:34:41<16:34:48, 7.32s/it] 33%|███▎ | 4033/12188 [8:34:49<16:50:51, 7.44s/it] {'loss': 0.332, 'grad_norm': 0.6099278227892865, 'learning_rate': 7.80911999366443e-06, 'epoch': 0.33} + 33%|███▎ | 4033/12188 [8:34:49<16:50:51, 7.44s/it] 33%|███▎ | 4034/12188 [8:34:56<16:47:59, 7.42s/it] {'loss': 0.3415, 'grad_norm': 0.6408729358525758, 'learning_rate': 7.808020713365419e-06, 'epoch': 0.33} + 33%|███▎ | 4034/12188 [8:34:56<16:47:59, 7.42s/it] 33%|███▎ | 4035/12188 [8:35:04<17:09:30, 7.58s/it] {'loss': 0.3057, 'grad_norm': 0.6436575764418772, 'learning_rate': 7.806921234768497e-06, 'epoch': 0.33} + 33%|███▎ | 4035/12188 [8:35:04<17:09:30, 7.58s/it] 33%|███▎ | 4036/12188 [8:35:11<16:41:19, 7.37s/it] {'loss': 0.3736, 'grad_norm': 0.6303219447073176, 'learning_rate': 7.805821557951305e-06, 'epoch': 0.33} + 33%|███▎ | 4036/12188 [8:35:11<16:41:19, 7.37s/it] 33%|███▎ | 4037/12188 [8:35:18<16:47:36, 7.42s/it] {'loss': 0.3513, 'grad_norm': 0.6990575678022085, 'learning_rate': 7.804721682991503e-06, 'epoch': 0.33} + 33%|███▎ | 4037/12188 [8:35:18<16:47:36, 7.42s/it] 33%|███▎ | 4038/12188 [8:35:25<16:34:56, 7.32s/it] {'loss': 0.3205, 'grad_norm': 0.6149638125793881, 'learning_rate': 7.803621609966762e-06, 'epoch': 0.33} + 33%|███▎ | 4038/12188 [8:35:26<16:34:56, 7.32s/it] 33%|███▎ | 4039/12188 [8:35:33<16:50:40, 7.44s/it] {'loss': 0.3986, 'grad_norm': 0.6345414947727697, 'learning_rate': 7.802521338954767e-06, 'epoch': 0.33} + 33%|███▎ | 4039/12188 [8:35:33<16:50:40, 7.44s/it] 33%|███▎ | 4040/12188 [8:35:40<16:33:46, 7.32s/it] {'loss': 0.3485, 'grad_norm': 0.6178439259006434, 'learning_rate': 7.801420870033218e-06, 'epoch': 0.33} + 33%|███▎ | 4040/12188 [8:35:40<16:33:46, 7.32s/it] 33%|███▎ | 4041/12188 [8:35:47<16:26:19, 7.26s/it] {'loss': 0.3457, 'grad_norm': 0.6558184768911091, 'learning_rate': 7.800320203279829e-06, 'epoch': 0.33} + 33%|███▎ | 4041/12188 [8:35:47<16:26:19, 7.26s/it] 33%|███▎ | 4042/12188 [8:35:55<16:44:07, 7.40s/it] {'loss': 0.3359, 'grad_norm': 0.6082820914053809, 'learning_rate': 7.799219338772327e-06, 'epoch': 0.33} + 33%|███▎ | 4042/12188 [8:35:55<16:44:07, 7.40s/it] 33%|███▎ | 4043/12188 [8:36:02<16:33:30, 7.32s/it] {'loss': 0.3854, 'grad_norm': 0.6546210030911372, 'learning_rate': 7.79811827658845e-06, 'epoch': 0.33} + 33%|███▎ | 4043/12188 [8:36:02<16:33:30, 7.32s/it] 33%|███▎ | 4044/12188 [8:36:10<16:57:18, 7.49s/it] {'loss': 0.3901, 'grad_norm': 0.616362556344265, 'learning_rate': 7.797017016805957e-06, 'epoch': 0.33} + 33%|███▎ | 4044/12188 [8:36:10<16:57:18, 7.49s/it] 33%|███▎ | 4045/12188 [8:36:18<17:12:40, 7.61s/it] {'loss': 0.3417, 'grad_norm': 0.6166494035329391, 'learning_rate': 7.795915559502616e-06, 'epoch': 0.33} + 33%|███▎ | 4045/12188 [8:36:18<17:12:40, 7.61s/it] 33%|███▎ | 4046/12188 [8:36:25<16:43:27, 7.39s/it] {'loss': 0.3257, 'grad_norm': 0.6185169507440403, 'learning_rate': 7.794813904756209e-06, 'epoch': 0.33} + 33%|███▎ | 4046/12188 [8:36:25<16:43:27, 7.39s/it] 33%|███▎ | 4047/12188 [8:36:32<16:21:54, 7.24s/it] {'loss': 0.3504, 'grad_norm': 0.599565054802429, 'learning_rate': 7.793712052644535e-06, 'epoch': 0.33} + 33%|███▎ | 4047/12188 [8:36:32<16:21:54, 7.24s/it] 33%|███▎ | 4048/12188 [8:36:39<16:22:17, 7.24s/it] {'loss': 0.359, 'grad_norm': 0.6340181327639354, 'learning_rate': 7.792610003245406e-06, 'epoch': 0.33} + 33%|███▎ | 4048/12188 [8:36:39<16:22:17, 7.24s/it] 33%|███▎ | 4049/12188 [8:36:46<15:58:33, 7.07s/it] {'loss': 0.3843, 'grad_norm': 0.6888545278701614, 'learning_rate': 7.791507756636643e-06, 'epoch': 0.33} + 33%|███▎ | 4049/12188 [8:36:46<15:58:33, 7.07s/it] 33%|███▎ | 4050/12188 [8:36:53<16:28:34, 7.29s/it] {'loss': 0.4168, 'grad_norm': 0.6355573283236872, 'learning_rate': 7.790405312896088e-06, 'epoch': 0.33} + 33%|███▎ | 4050/12188 [8:36:53<16:28:34, 7.29s/it] 33%|███▎ | 4051/12188 [8:37:01<16:57:36, 7.50s/it] {'loss': 0.374, 'grad_norm': 0.6484295051338416, 'learning_rate': 7.789302672101593e-06, 'epoch': 0.33} + 33%|███▎ | 4051/12188 [8:37:01<16:57:36, 7.50s/it] 33%|███▎ | 4052/12188 [8:37:08<16:35:35, 7.34s/it] {'loss': 0.3776, 'grad_norm': 0.6278443509416608, 'learning_rate': 7.788199834331026e-06, 'epoch': 0.33} + 33%|███▎ | 4052/12188 [8:37:08<16:35:35, 7.34s/it] 33%|███▎ | 4053/12188 [8:37:16<16:25:15, 7.27s/it] {'loss': 0.3498, 'grad_norm': 0.5815208244978146, 'learning_rate': 7.787096799662263e-06, 'epoch': 0.33} + 33%|███▎ | 4053/12188 [8:37:16<16:25:15, 7.27s/it] 33%|███▎ | 4054/12188 [8:37:22<16:08:16, 7.14s/it] {'loss': 0.393, 'grad_norm': 0.6377875177696042, 'learning_rate': 7.785993568173205e-06, 'epoch': 0.33} + 33%|███▎ | 4054/12188 [8:37:22<16:08:16, 7.14s/it] 33%|███▎ | 4055/12188 [8:37:31<17:05:25, 7.56s/it] {'loss': 0.3443, 'grad_norm': 0.5828083675328058, 'learning_rate': 7.784890139941755e-06, 'epoch': 0.33} + 33%|███▎ | 4055/12188 [8:37:31<17:05:25, 7.56s/it] 33%|███▎ | 4056/12188 [8:37:38<16:52:04, 7.47s/it] {'loss': 0.3836, 'grad_norm': 0.6476602545527373, 'learning_rate': 7.783786515045839e-06, 'epoch': 0.33} + 33%|███▎ | 4056/12188 [8:37:38<16:52:04, 7.47s/it] 33%|███▎ | 4057/12188 [8:37:45<16:35:32, 7.35s/it] {'loss': 0.3328, 'grad_norm': 0.6024946614834383, 'learning_rate': 7.782682693563391e-06, 'epoch': 0.33} + 33%|███▎ | 4057/12188 [8:37:45<16:35:32, 7.35s/it] 33%|███▎ | 4058/12188 [8:37:52<16:15:29, 7.20s/it] {'loss': 0.3641, 'grad_norm': 0.6640445670082776, 'learning_rate': 7.781578675572364e-06, 'epoch': 0.33} + 33%|███▎ | 4058/12188 [8:37:52<16:15:29, 7.20s/it] 33%|███▎ | 4059/12188 [8:37:59<15:58:54, 7.08s/it] {'loss': 0.368, 'grad_norm': 0.6438062692831585, 'learning_rate': 7.780474461150715e-06, 'epoch': 0.33} + 33%|███▎ | 4059/12188 [8:37:59<15:58:54, 7.08s/it] 33%|███▎ | 4060/12188 [8:38:06<15:46:25, 6.99s/it] {'loss': 0.3574, 'grad_norm': 0.6562827502869143, 'learning_rate': 7.77937005037643e-06, 'epoch': 0.33} + 33%|███▎ | 4060/12188 [8:38:06<15:46:25, 6.99s/it] 33%|███▎ | 4061/12188 [8:38:14<16:43:56, 7.41s/it] {'loss': 0.3301, 'grad_norm': 0.6530491319729351, 'learning_rate': 7.7782654433275e-06, 'epoch': 0.33} + 33%|███▎ | 4061/12188 [8:38:14<16:43:56, 7.41s/it] 33%|███▎ | 4062/12188 [8:38:21<16:17:47, 7.22s/it] {'loss': 0.3093, 'grad_norm': 0.6147073991186335, 'learning_rate': 7.777160640081925e-06, 'epoch': 0.33} + 33%|███▎ | 4062/12188 [8:38:21<16:17:47, 7.22s/it] 33%|███▎ | 4063/12188 [8:38:28<16:02:10, 7.11s/it] {'loss': 0.3753, 'grad_norm': 0.6432917382446715, 'learning_rate': 7.776055640717727e-06, 'epoch': 0.33} + 33%|███▎ | 4063/12188 [8:38:28<16:02:10, 7.11s/it] 33%|███▎ | 4064/12188 [8:38:35<16:07:50, 7.15s/it] {'loss': 0.3178, 'grad_norm': 0.613698436362954, 'learning_rate': 7.77495044531294e-06, 'epoch': 0.33} + 33%|███▎ | 4064/12188 [8:38:35<16:07:50, 7.15s/it] 33%|███▎ | 4065/12188 [8:38:42<15:52:44, 7.04s/it] {'loss': 0.3436, 'grad_norm': 0.645327953473634, 'learning_rate': 7.773845053945612e-06, 'epoch': 0.33} + 33%|███▎ | 4065/12188 [8:38:42<15:52:44, 7.04s/it] 33%|███▎ | 4066/12188 [8:38:49<15:57:04, 7.07s/it] {'loss': 0.3525, 'grad_norm': 0.6568562900111395, 'learning_rate': 7.772739466693801e-06, 'epoch': 0.33} + 33%|███▎ | 4066/12188 [8:38:49<15:57:04, 7.07s/it] 33%|███▎ | 4067/12188 [8:38:56<15:51:56, 7.03s/it] {'loss': 0.3735, 'grad_norm': 0.663047764817123, 'learning_rate': 7.771633683635586e-06, 'epoch': 0.33} + 33%|███▎ | 4067/12188 [8:38:56<15:51:56, 7.03s/it] 33%|███▎ | 4068/12188 [8:39:02<15:30:16, 6.87s/it] {'loss': 0.3498, 'grad_norm': 0.6614313133898073, 'learning_rate': 7.770527704849052e-06, 'epoch': 0.33} + 33%|███▎ | 4068/12188 [8:39:02<15:30:16, 6.87s/it] 33%|███▎ | 4069/12188 [8:39:09<15:18:30, 6.79s/it] {'loss': 0.3798, 'grad_norm': 0.7726066030644256, 'learning_rate': 7.769421530412302e-06, 'epoch': 0.33} + 33%|███▎ | 4069/12188 [8:39:09<15:18:30, 6.79s/it] 33%|███▎ | 4070/12188 [8:39:16<15:35:19, 6.91s/it] {'loss': 0.3466, 'grad_norm': 0.6200755293614089, 'learning_rate': 7.768315160403453e-06, 'epoch': 0.33} + 33%|███▎ | 4070/12188 [8:39:16<15:35:19, 6.91s/it] 33%|███▎ | 4071/12188 [8:39:23<15:51:13, 7.03s/it] {'loss': 0.3509, 'grad_norm': 0.6208324990411024, 'learning_rate': 7.767208594900635e-06, 'epoch': 0.33} + 33%|███▎ | 4071/12188 [8:39:23<15:51:13, 7.03s/it] 33%|███▎ | 4072/12188 [8:39:30<15:27:08, 6.85s/it] {'loss': 0.3378, 'grad_norm': 0.6549414557634783, 'learning_rate': 7.766101833981994e-06, 'epoch': 0.33} + 33%|███▎ | 4072/12188 [8:39:30<15:27:08, 6.85s/it] 33%|███▎ | 4073/12188 [8:39:36<15:17:16, 6.78s/it] {'loss': 0.3714, 'grad_norm': 0.6385141172821575, 'learning_rate': 7.764994877725684e-06, 'epoch': 0.33} + 33%|███▎ | 4073/12188 [8:39:36<15:17:16, 6.78s/it] 33%|███▎ | 4074/12188 [8:39:45<16:13:31, 7.20s/it] {'loss': 0.3369, 'grad_norm': 0.6358442985048072, 'learning_rate': 7.763887726209877e-06, 'epoch': 0.33} + 33%|███▎ | 4074/12188 [8:39:45<16:13:31, 7.20s/it] 33%|███▎ | 4075/12188 [8:39:52<16:00:47, 7.11s/it] {'loss': 0.37, 'grad_norm': 0.6683018011152858, 'learning_rate': 7.762780379512761e-06, 'epoch': 0.33} + 33%|███▎ | 4075/12188 [8:39:52<16:00:47, 7.11s/it] 33%|███▎ | 4076/12188 [8:39:59<16:00:25, 7.10s/it] {'loss': 0.3279, 'grad_norm': 0.6034171788126358, 'learning_rate': 7.761672837712533e-06, 'epoch': 0.33} + 33%|███▎ | 4076/12188 [8:39:59<16:00:25, 7.10s/it] 33%|███▎ | 4077/12188 [8:40:06<16:00:53, 7.11s/it] {'loss': 0.3432, 'grad_norm': 0.6232956628194204, 'learning_rate': 7.760565100887405e-06, 'epoch': 0.33} + 33%|███▎ | 4077/12188 [8:40:06<16:00:53, 7.11s/it] 33%|███▎ | 4078/12188 [8:40:13<16:12:48, 7.20s/it] {'loss': 0.3623, 'grad_norm': 0.6731138592028492, 'learning_rate': 7.759457169115606e-06, 'epoch': 0.33} + 33%|███▎ | 4078/12188 [8:40:13<16:12:48, 7.20s/it] 33%|███▎ | 4079/12188 [8:40:20<16:16:43, 7.23s/it] {'loss': 0.341, 'grad_norm': 0.6312498985376971, 'learning_rate': 7.758349042475374e-06, 'epoch': 0.33} + 33%|███▎ | 4079/12188 [8:40:20<16:16:43, 7.23s/it] 33%|███▎ | 4080/12188 [8:40:28<16:22:47, 7.27s/it] {'loss': 0.3406, 'grad_norm': 0.630887739453927, 'learning_rate': 7.757240721044967e-06, 'epoch': 0.33} + 33%|███▎ | 4080/12188 [8:40:28<16:22:47, 7.27s/it] 33%|███▎ | 4081/12188 [8:40:34<15:56:36, 7.08s/it] {'loss': 0.3717, 'grad_norm': 0.6639877117524499, 'learning_rate': 7.756132204902648e-06, 'epoch': 0.33} + 33%|███▎ | 4081/12188 [8:40:34<15:56:36, 7.08s/it] 33%|███▎ | 4082/12188 [8:40:42<16:26:50, 7.30s/it] {'loss': 0.4127, 'grad_norm': 0.6267533022672894, 'learning_rate': 7.7550234941267e-06, 'epoch': 0.33} + 33%|███▎ | 4082/12188 [8:40:42<16:26:50, 7.30s/it] 34%|███▎ | 4083/12188 [8:40:51<17:10:45, 7.63s/it] {'loss': 0.3944, 'grad_norm': 0.6539414151688071, 'learning_rate': 7.753914588795421e-06, 'epoch': 0.33} + 34%|███▎ | 4083/12188 [8:40:51<17:10:45, 7.63s/it] 34%|███▎ | 4084/12188 [8:40:58<16:39:42, 7.40s/it] {'loss': 0.3831, 'grad_norm': 0.6668430120363532, 'learning_rate': 7.752805488987116e-06, 'epoch': 0.34} + 34%|███▎ | 4084/12188 [8:40:58<16:39:42, 7.40s/it] 34%|███▎ | 4085/12188 [8:41:06<17:24:08, 7.73s/it] {'loss': 0.364, 'grad_norm': 0.6295090830388266, 'learning_rate': 7.751696194780112e-06, 'epoch': 0.34} + 34%|███▎ | 4085/12188 [8:41:06<17:24:08, 7.73s/it] 34%|███▎ | 4086/12188 [8:41:15<18:05:47, 8.04s/it] {'loss': 0.3553, 'grad_norm': 0.589225661483559, 'learning_rate': 7.750586706252743e-06, 'epoch': 0.34} + 34%|███▎ | 4086/12188 [8:41:15<18:05:47, 8.04s/it] 34%|███▎ | 4087/12188 [8:41:22<17:42:50, 7.87s/it] {'loss': 0.3762, 'grad_norm': 0.6464329866054863, 'learning_rate': 7.749477023483361e-06, 'epoch': 0.34} + 34%|███▎ | 4087/12188 [8:41:22<17:42:50, 7.87s/it] 34%|███▎ | 4088/12188 [8:41:29<17:05:14, 7.59s/it] {'loss': 0.3589, 'grad_norm': 0.6765773449165166, 'learning_rate': 7.748367146550327e-06, 'epoch': 0.34} + 34%|███▎ | 4088/12188 [8:41:29<17:05:14, 7.59s/it] 34%|███▎ | 4089/12188 [8:41:36<16:23:30, 7.29s/it] {'loss': 0.3731, 'grad_norm': 0.6593691413743069, 'learning_rate': 7.747257075532022e-06, 'epoch': 0.34} + 34%|███▎ | 4089/12188 [8:41:36<16:23:30, 7.29s/it] 34%|███▎ | 4090/12188 [8:41:44<16:45:29, 7.45s/it] {'loss': 0.3604, 'grad_norm': 0.6520867097679971, 'learning_rate': 7.746146810506834e-06, 'epoch': 0.34} + 34%|███▎ | 4090/12188 [8:41:44<16:45:29, 7.45s/it] 34%|███▎ | 4091/12188 [8:41:50<16:20:34, 7.27s/it] {'loss': 0.3081, 'grad_norm': 0.5790589471127424, 'learning_rate': 7.745036351553172e-06, 'epoch': 0.34} + 34%|███▎ | 4091/12188 [8:41:50<16:20:34, 7.27s/it] 34%|███▎ | 4092/12188 [8:41:59<16:56:14, 7.53s/it] {'loss': 0.345, 'grad_norm': 0.6418662798914643, 'learning_rate': 7.743925698749454e-06, 'epoch': 0.34} + 34%|███▎ | 4092/12188 [8:41:59<16:56:14, 7.53s/it] 34%|███▎ | 4093/12188 [8:42:06<16:34:38, 7.37s/it] {'loss': 0.3472, 'grad_norm': 0.6261020494174188, 'learning_rate': 7.742814852174112e-06, 'epoch': 0.34} + 34%|███▎ | 4093/12188 [8:42:06<16:34:38, 7.37s/it] 34%|███▎ | 4094/12188 [8:42:13<16:37:23, 7.39s/it] {'loss': 0.3448, 'grad_norm': 0.6709153771679117, 'learning_rate': 7.74170381190559e-06, 'epoch': 0.34} + 34%|███▎ | 4094/12188 [8:42:13<16:37:23, 7.39s/it] 34%|███▎ | 4095/12188 [8:42:20<16:10:01, 7.19s/it] {'loss': 0.3752, 'grad_norm': 0.6936349398173108, 'learning_rate': 7.74059257802235e-06, 'epoch': 0.34} + 34%|███▎ | 4095/12188 [8:42:20<16:10:01, 7.19s/it] 34%|███▎ | 4096/12188 [8:42:27<16:22:20, 7.28s/it] {'loss': 0.328, 'grad_norm': 0.5967813711468822, 'learning_rate': 7.739481150602865e-06, 'epoch': 0.34} + 34%|███▎ | 4096/12188 [8:42:27<16:22:20, 7.28s/it] 34%|███▎ | 4097/12188 [8:42:34<16:12:03, 7.21s/it] {'loss': 0.391, 'grad_norm': 0.648522402259355, 'learning_rate': 7.738369529725624e-06, 'epoch': 0.34} + 34%|███▎ | 4097/12188 [8:42:34<16:12:03, 7.21s/it] 34%|███▎ | 4098/12188 [8:42:42<16:20:12, 7.27s/it] {'loss': 0.3164, 'grad_norm': 0.6156427963440132, 'learning_rate': 7.737257715469124e-06, 'epoch': 0.34} + 34%|███▎ | 4098/12188 [8:42:42<16:20:12, 7.27s/it] 34%|███▎ | 4099/12188 [8:42:49<16:05:52, 7.16s/it] {'loss': 0.3758, 'grad_norm': 0.6527680347219319, 'learning_rate': 7.736145707911884e-06, 'epoch': 0.34} + 34%|███▎ | 4099/12188 [8:42:49<16:05:52, 7.16s/it] 34%|███▎ | 4100/12188 [8:42:56<16:02:38, 7.14s/it] {'loss': 0.3881, 'grad_norm': 0.6290622617897808, 'learning_rate': 7.735033507132429e-06, 'epoch': 0.34} + 34%|███▎ | 4100/12188 [8:42:56<16:02:38, 7.14s/it] 34%|███▎ | 4101/12188 [8:43:04<16:54:20, 7.53s/it] {'loss': 0.3054, 'grad_norm': 0.6294631981361496, 'learning_rate': 7.733921113209302e-06, 'epoch': 0.34} + 34%|███▎ | 4101/12188 [8:43:04<16:54:20, 7.53s/it] 34%|███▎ | 4102/12188 [8:43:11<16:37:33, 7.40s/it] {'loss': 0.3554, 'grad_norm': 0.6418479001775654, 'learning_rate': 7.732808526221058e-06, 'epoch': 0.34} + 34%|███▎ | 4102/12188 [8:43:11<16:37:33, 7.40s/it] 34%|███▎ | 4103/12188 [8:43:18<16:08:45, 7.19s/it] {'loss': 0.3461, 'grad_norm': 0.5891680660121622, 'learning_rate': 7.731695746246268e-06, 'epoch': 0.34} + 34%|███▎ | 4103/12188 [8:43:18<16:08:45, 7.19s/it] 34%|███▎ | 4104/12188 [8:43:25<16:14:40, 7.23s/it] {'loss': 0.3717, 'grad_norm': 0.6694743769865772, 'learning_rate': 7.730582773363511e-06, 'epoch': 0.34} + 34%|███▎ | 4104/12188 [8:43:25<16:14:40, 7.23s/it] 34%|███▎ | 4105/12188 [8:43:33<16:24:27, 7.31s/it] {'loss': 0.3498, 'grad_norm': 0.6848407776811216, 'learning_rate': 7.729469607651387e-06, 'epoch': 0.34} + 34%|███▎ | 4105/12188 [8:43:33<16:24:27, 7.31s/it] 34%|███▎ | 4106/12188 [8:43:42<17:57:21, 8.00s/it] {'loss': 0.3621, 'grad_norm': 0.7085383850063207, 'learning_rate': 7.728356249188504e-06, 'epoch': 0.34} + 34%|███▎ | 4106/12188 [8:43:42<17:57:21, 8.00s/it] 34%|███▎ | 4107/12188 [8:43:49<17:03:54, 7.60s/it] {'loss': 0.3564, 'grad_norm': 0.61248451804519, 'learning_rate': 7.727242698053488e-06, 'epoch': 0.34} + 34%|███▎ | 4107/12188 [8:43:49<17:03:54, 7.60s/it] 34%|███▎ | 4108/12188 [8:43:56<16:21:07, 7.29s/it] {'loss': 0.3324, 'grad_norm': 0.6298196608028203, 'learning_rate': 7.726128954324971e-06, 'epoch': 0.34} + 34%|███▎ | 4108/12188 [8:43:56<16:21:07, 7.29s/it] 34%|███▎ | 4109/12188 [8:44:03<16:15:24, 7.24s/it] {'loss': 0.32, 'grad_norm': 0.5492919697206993, 'learning_rate': 7.72501501808161e-06, 'epoch': 0.34} + 34%|███▎ | 4109/12188 [8:44:03<16:15:24, 7.24s/it] 34%|███▎ | 4110/12188 [8:44:10<15:56:14, 7.10s/it] {'loss': 0.366, 'grad_norm': 0.6137360893697085, 'learning_rate': 7.723900889402065e-06, 'epoch': 0.34} + 34%|███▎ | 4110/12188 [8:44:10<15:56:14, 7.10s/it] 34%|███▎ | 4111/12188 [8:44:17<16:23:08, 7.30s/it] {'loss': 0.3422, 'grad_norm': 0.6873326873730783, 'learning_rate': 7.722786568365015e-06, 'epoch': 0.34} + 34%|███▎ | 4111/12188 [8:44:17<16:23:08, 7.30s/it] 34%|███▎ | 4112/12188 [8:44:24<15:56:30, 7.11s/it] {'loss': 0.3658, 'grad_norm': 0.6930406786073827, 'learning_rate': 7.721672055049153e-06, 'epoch': 0.34} + 34%|███▎ | 4112/12188 [8:44:24<15:56:30, 7.11s/it] 34%|███▎ | 4113/12188 [8:44:31<15:42:38, 7.00s/it] {'loss': 0.3838, 'grad_norm': 0.6476472333231771, 'learning_rate': 7.720557349533183e-06, 'epoch': 0.34} + 34%|███▎ | 4113/12188 [8:44:31<15:42:38, 7.00s/it] 34%|███▍ | 4114/12188 [8:44:38<15:44:12, 7.02s/it] {'loss': 0.3708, 'grad_norm': 0.6709359484316791, 'learning_rate': 7.719442451895824e-06, 'epoch': 0.34} + 34%|███▍ | 4114/12188 [8:44:38<15:44:12, 7.02s/it] 34%|███▍ | 4115/12188 [8:44:47<17:07:39, 7.64s/it] {'loss': 0.3335, 'grad_norm': 0.6476366832798613, 'learning_rate': 7.718327362215809e-06, 'epoch': 0.34} + 34%|███▍ | 4115/12188 [8:44:47<17:07:39, 7.64s/it] 34%|███▍ | 4116/12188 [8:44:55<17:22:02, 7.75s/it] {'loss': 0.3734, 'grad_norm': 0.6338101260424004, 'learning_rate': 7.71721208057188e-06, 'epoch': 0.34} + 34%|███▍ | 4116/12188 [8:44:55<17:22:02, 7.75s/it] 34%|███▍ | 4117/12188 [8:45:02<16:43:02, 7.46s/it] {'loss': 0.3609, 'grad_norm': 0.6085437108899204, 'learning_rate': 7.716096607042802e-06, 'epoch': 0.34} + 34%|███▍ | 4117/12188 [8:45:02<16:43:02, 7.46s/it] 34%|███▍ | 4118/12188 [8:45:10<17:38:08, 7.87s/it] {'loss': 0.3793, 'grad_norm': 0.6470512411841841, 'learning_rate': 7.714980941707345e-06, 'epoch': 0.34} + 34%|███▍ | 4118/12188 [8:45:10<17:38:08, 7.87s/it] 34%|███▍ | 4119/12188 [8:45:18<17:28:27, 7.80s/it] {'loss': 0.3576, 'grad_norm': 0.6156501974438027, 'learning_rate': 7.713865084644297e-06, 'epoch': 0.34} + 34%|███▍ | 4119/12188 [8:45:18<17:28:27, 7.80s/it] 34%|███▍ | 4120/12188 [8:45:25<16:38:39, 7.43s/it] {'loss': 0.3717, 'grad_norm': 0.6189923049066208, 'learning_rate': 7.712749035932454e-06, 'epoch': 0.34} + 34%|███▍ | 4120/12188 [8:45:25<16:38:39, 7.43s/it] 34%|███▍ | 4121/12188 [8:45:32<16:31:36, 7.38s/it] {'loss': 0.3518, 'grad_norm': 0.6072185223982953, 'learning_rate': 7.711632795650634e-06, 'epoch': 0.34} + 34%|███▍ | 4121/12188 [8:45:32<16:31:36, 7.38s/it] 34%|███▍ | 4122/12188 [8:45:40<16:43:38, 7.47s/it] {'loss': 0.3452, 'grad_norm': 0.6605101238515693, 'learning_rate': 7.710516363877662e-06, 'epoch': 0.34} + 34%|███▍ | 4122/12188 [8:45:40<16:43:38, 7.47s/it] 34%|███▍ | 4123/12188 [8:45:47<16:39:23, 7.43s/it] {'loss': 0.3757, 'grad_norm': 0.5995008222933018, 'learning_rate': 7.70939974069238e-06, 'epoch': 0.34} + 34%|███▍ | 4123/12188 [8:45:47<16:39:23, 7.43s/it] 34%|███▍ | 4124/12188 [8:45:54<16:26:10, 7.34s/it] {'loss': 0.3451, 'grad_norm': 0.6325787971708328, 'learning_rate': 7.70828292617364e-06, 'epoch': 0.34} + 34%|███▍ | 4124/12188 [8:45:54<16:26:10, 7.34s/it] 34%|███▍ | 4125/12188 [8:46:01<16:23:56, 7.32s/it] {'loss': 0.3514, 'grad_norm': 0.6303034516816596, 'learning_rate': 7.707165920400314e-06, 'epoch': 0.34} + 34%|███▍ | 4125/12188 [8:46:01<16:23:56, 7.32s/it] 34%|███▍ | 4126/12188 [8:46:09<16:26:19, 7.34s/it] {'loss': 0.3717, 'grad_norm': 0.6497728565504431, 'learning_rate': 7.706048723451276e-06, 'epoch': 0.34} + 34%|███▍ | 4126/12188 [8:46:09<16:26:19, 7.34s/it] 34%|███▍ | 4127/12188 [8:46:15<15:47:57, 7.06s/it] {'loss': 0.3204, 'grad_norm': 0.622618222911655, 'learning_rate': 7.704931335405428e-06, 'epoch': 0.34} + 34%|███▍ | 4127/12188 [8:46:15<15:47:57, 7.06s/it] 34%|███▍ | 4128/12188 [8:46:22<15:41:58, 7.01s/it] {'loss': 0.3722, 'grad_norm': 0.6401061890837538, 'learning_rate': 7.703813756341673e-06, 'epoch': 0.34} + 34%|███▍ | 4128/12188 [8:46:22<15:41:58, 7.01s/it] 34%|███▍ | 4129/12188 [8:46:29<15:36:03, 6.97s/it] {'loss': 0.3515, 'grad_norm': 0.6710295571931958, 'learning_rate': 7.702695986338935e-06, 'epoch': 0.34} + 34%|███▍ | 4129/12188 [8:46:29<15:36:03, 6.97s/it] 34%|███▍ | 4130/12188 [8:46:36<15:35:22, 6.96s/it] {'loss': 0.3586, 'grad_norm': 0.6752807833759652, 'learning_rate': 7.70157802547615e-06, 'epoch': 0.34} + 34%|███▍ | 4130/12188 [8:46:36<15:35:22, 6.96s/it] 34%|███▍ | 4131/12188 [8:46:43<16:02:48, 7.17s/it] {'loss': 0.3639, 'grad_norm': 0.5822108647807972, 'learning_rate': 7.700459873832263e-06, 'epoch': 0.34} + 34%|███▍ | 4131/12188 [8:46:44<16:02:48, 7.17s/it] 34%|███▍ | 4132/12188 [8:46:51<16:00:38, 7.15s/it] {'loss': 0.3152, 'grad_norm': 0.6149556060115998, 'learning_rate': 7.69934153148624e-06, 'epoch': 0.34} + 34%|███▍ | 4132/12188 [8:46:51<16:00:38, 7.15s/it] 34%|███▍ | 4133/12188 [8:46:58<16:23:10, 7.32s/it] {'loss': 0.375, 'grad_norm': 0.6012387432917506, 'learning_rate': 7.698222998517054e-06, 'epoch': 0.34} + 34%|███▍ | 4133/12188 [8:46:58<16:23:10, 7.32s/it] 34%|███▍ | 4134/12188 [8:47:05<16:08:18, 7.21s/it] {'loss': 0.3522, 'grad_norm': 0.6794910920560392, 'learning_rate': 7.697104275003696e-06, 'epoch': 0.34} + 34%|███▍ | 4134/12188 [8:47:05<16:08:18, 7.21s/it] 34%|███▍ | 4135/12188 [8:47:12<16:05:53, 7.20s/it] {'loss': 0.3406, 'grad_norm': 0.6533480119663367, 'learning_rate': 7.695985361025164e-06, 'epoch': 0.34} + 34%|███▍ | 4135/12188 [8:47:12<16:05:53, 7.20s/it] 34%|███▍ | 4136/12188 [8:47:20<16:36:30, 7.43s/it] {'loss': 0.3654, 'grad_norm': 0.6722957442307633, 'learning_rate': 7.694866256660482e-06, 'epoch': 0.34} + 34%|███▍ | 4136/12188 [8:47:20<16:36:30, 7.43s/it] 34%|███▍ | 4137/12188 [8:47:27<16:21:55, 7.32s/it] {'loss': 0.2935, 'grad_norm': 0.588981785937436, 'learning_rate': 7.693746961988672e-06, 'epoch': 0.34} + 34%|███▍ | 4137/12188 [8:47:27<16:21:55, 7.32s/it] 34%|███▍ | 4138/12188 [8:47:36<17:11:42, 7.69s/it] {'loss': 0.3421, 'grad_norm': 0.617751406931135, 'learning_rate': 7.69262747708878e-06, 'epoch': 0.34} + 34%|███▍ | 4138/12188 [8:47:36<17:11:42, 7.69s/it] 34%|███▍ | 4139/12188 [8:47:44<17:28:38, 7.82s/it] {'loss': 0.3209, 'grad_norm': 0.6383900746965064, 'learning_rate': 7.691507802039861e-06, 'epoch': 0.34} + 34%|███▍ | 4139/12188 [8:47:44<17:28:38, 7.82s/it] 34%|███▍ | 4140/12188 [8:47:54<19:07:36, 8.56s/it] {'loss': 0.3545, 'grad_norm': 0.6619485120387361, 'learning_rate': 7.690387936920984e-06, 'epoch': 0.34} + 34%|███▍ | 4140/12188 [8:47:54<19:07:36, 8.56s/it] 34%|███▍ | 4141/12188 [8:48:02<18:11:34, 8.14s/it] {'loss': 0.3588, 'grad_norm': 0.6438548151848984, 'learning_rate': 7.689267881811235e-06, 'epoch': 0.34} + 34%|███▍ | 4141/12188 [8:48:02<18:11:34, 8.14s/it] 34%|███▍ | 4142/12188 [8:48:10<18:10:06, 8.13s/it] {'loss': 0.4125, 'grad_norm': 0.6581337681313286, 'learning_rate': 7.688147636789709e-06, 'epoch': 0.34} + 34%|███▍ | 4142/12188 [8:48:10<18:10:06, 8.13s/it] 34%|███▍ | 4143/12188 [8:48:16<17:11:58, 7.70s/it] {'loss': 0.3367, 'grad_norm': 0.6564239670323103, 'learning_rate': 7.687027201935516e-06, 'epoch': 0.34} + 34%|███▍ | 4143/12188 [8:48:16<17:11:58, 7.70s/it] 34%|███▍ | 4144/12188 [8:48:23<16:36:43, 7.43s/it] {'loss': 0.3831, 'grad_norm': 0.6922833063232003, 'learning_rate': 7.685906577327779e-06, 'epoch': 0.34} + 34%|███▍ | 4144/12188 [8:48:23<16:36:43, 7.43s/it] 34%|███▍ | 4145/12188 [8:48:30<16:08:50, 7.23s/it] {'loss': 0.3485, 'grad_norm': 0.6170678755364921, 'learning_rate': 7.684785763045635e-06, 'epoch': 0.34} + 34%|███▍ | 4145/12188 [8:48:30<16:08:50, 7.23s/it] 34%|███▍ | 4146/12188 [8:48:37<15:53:02, 7.11s/it] {'loss': 0.3471, 'grad_norm': 0.6527918436732315, 'learning_rate': 7.683664759168233e-06, 'epoch': 0.34} + 34%|███▍ | 4146/12188 [8:48:37<15:53:02, 7.11s/it] 34%|███▍ | 4147/12188 [8:48:44<15:59:54, 7.16s/it] {'loss': 0.3523, 'grad_norm': 0.6218469196777309, 'learning_rate': 7.682543565774736e-06, 'epoch': 0.34} + 34%|███▍ | 4147/12188 [8:48:44<15:59:54, 7.16s/it] 34%|███▍ | 4148/12188 [8:48:51<15:54:27, 7.12s/it] {'loss': 0.3537, 'grad_norm': 0.6448057374013446, 'learning_rate': 7.681422182944325e-06, 'epoch': 0.34} + 34%|███▍ | 4148/12188 [8:48:51<15:54:27, 7.12s/it] 34%|███▍ | 4149/12188 [8:48:58<15:37:49, 7.00s/it] {'loss': 0.33, 'grad_norm': 0.5570291417623058, 'learning_rate': 7.680300610756185e-06, 'epoch': 0.34} + 34%|███▍ | 4149/12188 [8:48:58<15:37:49, 7.00s/it] 34%|███▍ | 4150/12188 [8:49:05<15:45:29, 7.06s/it] {'loss': 0.3506, 'grad_norm': 0.6301632843778358, 'learning_rate': 7.679178849289523e-06, 'epoch': 0.34} + 34%|███▍ | 4150/12188 [8:49:05<15:45:29, 7.06s/it] 34%|███▍ | 4151/12188 [8:49:12<15:39:23, 7.01s/it] {'loss': 0.3474, 'grad_norm': 0.6515931428324109, 'learning_rate': 7.678056898623556e-06, 'epoch': 0.34} + 34%|███▍ | 4151/12188 [8:49:12<15:39:23, 7.01s/it] 34%|███▍ | 4152/12188 [8:49:21<16:52:42, 7.56s/it] {'loss': 0.3388, 'grad_norm': 0.651364428947656, 'learning_rate': 7.676934758837513e-06, 'epoch': 0.34} + 34%|███▍ | 4152/12188 [8:49:21<16:52:42, 7.56s/it] 34%|███▍ | 4153/12188 [8:49:28<16:55:54, 7.59s/it] {'loss': 0.3152, 'grad_norm': 0.5890293957823008, 'learning_rate': 7.675812430010636e-06, 'epoch': 0.34} + 34%|███▍ | 4153/12188 [8:49:28<16:55:54, 7.59s/it] 34%|███▍ | 4154/12188 [8:49:36<17:16:16, 7.74s/it] {'loss': 0.4014, 'grad_norm': 0.6886443607233144, 'learning_rate': 7.674689912222186e-06, 'epoch': 0.34} + 34%|███▍ | 4154/12188 [8:49:37<17:16:16, 7.74s/it] 34%|███▍ | 4155/12188 [8:49:44<16:55:56, 7.59s/it] {'loss': 0.3663, 'grad_norm': 0.6471019604599654, 'learning_rate': 7.673567205551434e-06, 'epoch': 0.34} + 34%|███▍ | 4155/12188 [8:49:44<16:55:56, 7.59s/it] 34%|███▍ | 4156/12188 [8:49:51<16:56:10, 7.59s/it] {'loss': 0.3554, 'grad_norm': 0.6147573590327262, 'learning_rate': 7.672444310077657e-06, 'epoch': 0.34} + 34%|███▍ | 4156/12188 [8:49:51<16:56:10, 7.59s/it] 34%|███▍ | 4157/12188 [8:49:58<16:33:22, 7.42s/it] {'loss': 0.3357, 'grad_norm': 0.6371172640234267, 'learning_rate': 7.67132122588016e-06, 'epoch': 0.34} + 34%|███▍ | 4157/12188 [8:49:58<16:33:22, 7.42s/it] 34%|███▍ | 4158/12188 [8:50:06<16:30:04, 7.40s/it] {'loss': 0.341, 'grad_norm': 0.582293503074691, 'learning_rate': 7.670197953038248e-06, 'epoch': 0.34} + 34%|███▍ | 4158/12188 [8:50:06<16:30:04, 7.40s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f0aa710f6a0> +[Try #0] Failed to fetch sample 4758762 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f0aa710f6a0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Logs'"}, {'from': 'gpt', 'value': '\nclick(x=0.8445, y=0.388)\n'}]} + 34%|███▍ | 4159/12188 [8:50:13<16:17:27, 7.30s/it] {'loss': 0.3403, 'grad_norm': 0.6394112499375746, 'learning_rate': 7.669074491631246e-06, 'epoch': 0.34} + 34%|███▍ | 4159/12188 [8:50:13<16:17:27, 7.30s/it] 34%|███▍ | 4160/12188 [8:50:20<16:24:14, 7.36s/it] {'loss': 0.3681, 'grad_norm': 0.5969793174572812, 'learning_rate': 7.667950841738493e-06, 'epoch': 0.34} + 34%|███▍ | 4160/12188 [8:50:20<16:24:14, 7.36s/it] 34%|███▍ | 4161/12188 [8:50:27<16:18:43, 7.32s/it] {'loss': 0.3344, 'grad_norm': 0.6218478816256013, 'learning_rate': 7.666827003439338e-06, 'epoch': 0.34} + 34%|███▍ | 4161/12188 [8:50:28<16:18:43, 7.32s/it] 34%|███▍ | 4162/12188 [8:50:34<15:43:56, 7.06s/it] {'loss': 0.3421, 'grad_norm': 0.5986552307935588, 'learning_rate': 7.665702976813142e-06, 'epoch': 0.34} + 34%|███▍ | 4162/12188 [8:50:34<15:43:56, 7.06s/it] 34%|███▍ | 4163/12188 [8:50:41<15:40:09, 7.03s/it] {'loss': 0.3358, 'grad_norm': 0.6265208808661847, 'learning_rate': 7.664578761939288e-06, 'epoch': 0.34} + 34%|███▍ | 4163/12188 [8:50:41<15:40:09, 7.03s/it] 34%|███▍ | 4164/12188 [8:50:48<15:44:03, 7.06s/it] {'loss': 0.3469, 'grad_norm': 0.636826290104893, 'learning_rate': 7.663454358897161e-06, 'epoch': 0.34} + 34%|███▍ | 4164/12188 [8:50:48<15:44:03, 7.06s/it] 34%|███▍ | 4165/12188 [8:50:55<15:24:52, 6.92s/it] {'loss': 0.3385, 'grad_norm': 0.6127432665563871, 'learning_rate': 7.662329767766168e-06, 'epoch': 0.34} + 34%|███▍ | 4165/12188 [8:50:55<15:24:52, 6.92s/it] 34%|███▍ | 4166/12188 [8:51:03<16:36:27, 7.45s/it] {'loss': 0.3345, 'grad_norm': 0.6296551919136815, 'learning_rate': 7.661204988625724e-06, 'epoch': 0.34} + 34%|███▍ | 4166/12188 [8:51:03<16:36:27, 7.45s/it] 34%|███▍ | 4167/12188 [8:51:11<16:41:19, 7.49s/it] {'loss': 0.3377, 'grad_norm': 0.6334706555844278, 'learning_rate': 7.660080021555255e-06, 'epoch': 0.34} + 34%|███▍ | 4167/12188 [8:51:11<16:41:19, 7.49s/it] 34%|███▍ | 4168/12188 [8:51:19<16:47:13, 7.54s/it] {'loss': 0.3077, 'grad_norm': 0.6872982067060177, 'learning_rate': 7.658954866634213e-06, 'epoch': 0.34} + 34%|███▍ | 4168/12188 [8:51:19<16:47:13, 7.54s/it] 34%|███▍ | 4169/12188 [8:51:26<16:32:16, 7.42s/it] {'loss': 0.3748, 'grad_norm': 0.6120626683400578, 'learning_rate': 7.657829523942049e-06, 'epoch': 0.34} + 34%|███▍ | 4169/12188 [8:51:26<16:32:16, 7.42s/it] 34%|███▍ | 4170/12188 [8:51:33<16:12:49, 7.28s/it] {'loss': 0.3135, 'grad_norm': 0.6209229222010534, 'learning_rate': 7.656703993558235e-06, 'epoch': 0.34} + 34%|███▍ | 4170/12188 [8:51:33<16:12:49, 7.28s/it] 34%|███▍ | 4171/12188 [8:51:40<15:58:09, 7.17s/it] {'loss': 0.3305, 'grad_norm': 0.6169271841310977, 'learning_rate': 7.655578275562252e-06, 'epoch': 0.34} + 34%|███▍ | 4171/12188 [8:51:40<15:58:09, 7.17s/it] 34%|███▍ | 4172/12188 [8:51:46<15:48:01, 7.10s/it] {'loss': 0.311, 'grad_norm': 0.6003132310032544, 'learning_rate': 7.654452370033598e-06, 'epoch': 0.34} + 34%|███▍ | 4172/12188 [8:51:46<15:48:01, 7.10s/it] 34%|███▍ | 4173/12188 [8:51:56<17:17:12, 7.76s/it] {'loss': 0.3602, 'grad_norm': 0.5966932265723529, 'learning_rate': 7.653326277051783e-06, 'epoch': 0.34} + 34%|███▍ | 4173/12188 [8:51:56<17:17:12, 7.76s/it] 34%|███▍ | 4174/12188 [8:52:03<16:44:22, 7.52s/it] {'loss': 0.3529, 'grad_norm': 0.6353966093247563, 'learning_rate': 7.652199996696329e-06, 'epoch': 0.34} + 34%|███▍ | 4174/12188 [8:52:03<16:44:22, 7.52s/it] 34%|███▍ | 4175/12188 [8:52:10<16:16:16, 7.31s/it] {'loss': 0.4009, 'grad_norm': 0.6131613583334949, 'learning_rate': 7.651073529046772e-06, 'epoch': 0.34} + 34%|███▍ | 4175/12188 [8:52:10<16:16:16, 7.31s/it] 34%|███▍ | 4176/12188 [8:52:17<16:02:35, 7.21s/it] {'loss': 0.3296, 'grad_norm': 0.6376003007183152, 'learning_rate': 7.64994687418266e-06, 'epoch': 0.34} + 34%|███▍ | 4176/12188 [8:52:17<16:02:35, 7.21s/it] 34%|███▍ | 4177/12188 [8:52:24<15:52:51, 7.14s/it] {'loss': 0.3408, 'grad_norm': 0.6772421087192332, 'learning_rate': 7.648820032183559e-06, 'epoch': 0.34} + 34%|███▍ | 4177/12188 [8:52:24<15:52:51, 7.14s/it] 34%|███▍ | 4178/12188 [8:52:31<15:52:32, 7.14s/it] {'loss': 0.3422, 'grad_norm': 0.673260777300277, 'learning_rate': 7.647693003129044e-06, 'epoch': 0.34} + 34%|███▍ | 4178/12188 [8:52:31<15:52:32, 7.14s/it] 34%|███▍ | 4179/12188 [8:52:38<16:20:16, 7.34s/it] {'loss': 0.3851, 'grad_norm': 0.6276107990810416, 'learning_rate': 7.646565787098702e-06, 'epoch': 0.34} + 34%|███▍ | 4179/12188 [8:52:38<16:20:16, 7.34s/it] 34%|███▍ | 4180/12188 [8:52:46<16:39:32, 7.49s/it] {'loss': 0.366, 'grad_norm': 0.6091362958104939, 'learning_rate': 7.645438384172134e-06, 'epoch': 0.34} + 34%|███▍ | 4180/12188 [8:52:46<16:39:32, 7.49s/it] 34%|███▍ | 4181/12188 [8:52:53<16:15:50, 7.31s/it] {'loss': 0.3538, 'grad_norm': 0.6840879137211848, 'learning_rate': 7.64431079442896e-06, 'epoch': 0.34} + 34%|███▍ | 4181/12188 [8:52:53<16:15:50, 7.31s/it] 34%|███▍ | 4182/12188 [8:53:00<15:51:26, 7.13s/it] {'loss': 0.3369, 'grad_norm': 0.615641688926424, 'learning_rate': 7.643183017948806e-06, 'epoch': 0.34} + 34%|███▍ | 4182/12188 [8:53:00<15:51:26, 7.13s/it] 34%|███▍ | 4183/12188 [8:53:07<15:46:49, 7.10s/it] {'loss': 0.3505, 'grad_norm': 0.6300762101745881, 'learning_rate': 7.642055054811315e-06, 'epoch': 0.34} + 34%|███▍ | 4183/12188 [8:53:07<15:46:49, 7.10s/it] 34%|███▍ | 4184/12188 [8:53:15<16:14:23, 7.30s/it] {'loss': 0.3981, 'grad_norm': 0.5953211574660715, 'learning_rate': 7.640926905096139e-06, 'epoch': 0.34} + 34%|███▍ | 4184/12188 [8:53:15<16:14:23, 7.30s/it] 34%|███▍ | 4185/12188 [8:53:22<16:06:23, 7.25s/it] {'loss': 0.3463, 'grad_norm': 0.6658403625323033, 'learning_rate': 7.639798568882947e-06, 'epoch': 0.34} + 34%|███▍ | 4185/12188 [8:53:22<16:06:23, 7.25s/it] 34%|███▍ | 4186/12188 [8:53:30<16:27:39, 7.41s/it] {'loss': 0.3937, 'grad_norm': 0.6727834958858018, 'learning_rate': 7.638670046251424e-06, 'epoch': 0.34} + 34%|███▍ | 4186/12188 [8:53:30<16:27:39, 7.41s/it] 34%|███▍ | 4187/12188 [8:53:37<16:21:45, 7.36s/it] {'loss': 0.3892, 'grad_norm': 0.6599046868929475, 'learning_rate': 7.63754133728126e-06, 'epoch': 0.34} + 34%|███▍ | 4187/12188 [8:53:37<16:21:45, 7.36s/it] 34%|███▍ | 4188/12188 [8:53:44<15:57:23, 7.18s/it] {'loss': 0.3281, 'grad_norm': 0.6569295012268115, 'learning_rate': 7.636412442052163e-06, 'epoch': 0.34} + 34%|███▍ | 4188/12188 [8:53:44<15:57:23, 7.18s/it] 34%|███▍ | 4189/12188 [8:53:52<16:27:35, 7.41s/it] {'loss': 0.3301, 'grad_norm': 0.7205105339357653, 'learning_rate': 7.635283360643857e-06, 'epoch': 0.34} + 34%|███▍ | 4189/12188 [8:53:52<16:27:35, 7.41s/it] 34%|███▍ | 4190/12188 [8:53:59<16:14:20, 7.31s/it] {'loss': 0.4032, 'grad_norm': 0.6436054931278368, 'learning_rate': 7.634154093136073e-06, 'epoch': 0.34} + 34%|███▍ | 4190/12188 [8:53:59<16:14:20, 7.31s/it] 34%|███▍ | 4191/12188 [8:54:07<16:45:04, 7.54s/it] {'loss': 0.3274, 'grad_norm': 0.6553668616523388, 'learning_rate': 7.63302463960856e-06, 'epoch': 0.34} + 34%|███▍ | 4191/12188 [8:54:07<16:45:04, 7.54s/it] 34%|███▍ | 4192/12188 [8:54:13<16:10:55, 7.29s/it] {'loss': 0.3352, 'grad_norm': 0.6285064547994039, 'learning_rate': 7.631895000141077e-06, 'epoch': 0.34} + 34%|███▍ | 4192/12188 [8:54:13<16:10:55, 7.29s/it] 34%|███▍ | 4193/12188 [8:54:21<16:11:20, 7.29s/it] {'loss': 0.3699, 'grad_norm': 0.629241353024002, 'learning_rate': 7.630765174813398e-06, 'epoch': 0.34} + 34%|███▍ | 4193/12188 [8:54:21<16:11:20, 7.29s/it] 34%|███▍ | 4194/12188 [8:54:28<16:27:24, 7.41s/it] {'loss': 0.3567, 'grad_norm': 0.6807690662923291, 'learning_rate': 7.629635163705307e-06, 'epoch': 0.34} + 34%|███▍ | 4194/12188 [8:54:28<16:27:24, 7.41s/it] 34%|███▍ | 4195/12188 [8:54:37<17:05:12, 7.70s/it] {'loss': 0.3614, 'grad_norm': 0.6306659626364883, 'learning_rate': 7.628504966896608e-06, 'epoch': 0.34} + 34%|███▍ | 4195/12188 [8:54:37<17:05:12, 7.70s/it] 34%|███▍ | 4196/12188 [8:54:44<16:33:25, 7.46s/it] {'loss': 0.3799, 'grad_norm': 0.6552450636938627, 'learning_rate': 7.62737458446711e-06, 'epoch': 0.34} + 34%|███▍ | 4196/12188 [8:54:44<16:33:25, 7.46s/it] 34%|███▍ | 4197/12188 [8:54:51<16:44:59, 7.55s/it] {'loss': 0.4039, 'grad_norm': 0.6453503104225492, 'learning_rate': 7.62624401649664e-06, 'epoch': 0.34} + 34%|███▍ | 4197/12188 [8:54:51<16:44:59, 7.55s/it] 34%|███▍ | 4198/12188 [8:55:11<24:25:53, 11.01s/it] {'loss': 0.3201, 'grad_norm': 0.6009334128294239, 'learning_rate': 7.625113263065037e-06, 'epoch': 0.34} + 34%|███▍ | 4198/12188 [8:55:11<24:25:53, 11.01s/it] 34%|███▍ | 4199/12188 [8:55:19<22:29:21, 10.13s/it] {'loss': 0.3475, 'grad_norm': 0.59830809119082, 'learning_rate': 7.623982324252152e-06, 'epoch': 0.34} + 34%|███▍ | 4199/12188 [8:55:19<22:29:21, 10.13s/it] 34%|███▍ | 4200/12188 [8:55:26<20:44:18, 9.35s/it] {'loss': 0.3448, 'grad_norm': 0.7291623311275677, 'learning_rate': 7.622851200137853e-06, 'epoch': 0.34} + 34%|███▍ | 4200/12188 [8:55:26<20:44:18, 9.35s/it] 34%|███▍ | 4201/12188 [8:55:33<19:13:51, 8.67s/it] {'loss': 0.3698, 'grad_norm': 0.6338732573196608, 'learning_rate': 7.621719890802015e-06, 'epoch': 0.34} + 34%|███▍ | 4201/12188 [8:55:33<19:13:51, 8.67s/it] 34%|███▍ | 4202/12188 [8:56:07<35:49:02, 16.15s/it] {'loss': 0.3837, 'grad_norm': 0.655434616838403, 'learning_rate': 7.620588396324531e-06, 'epoch': 0.34} + 34%|███▍ | 4202/12188 [8:56:07<35:49:02, 16.15s/it] 34%|███▍ | 4203/12188 [8:56:14<29:52:14, 13.47s/it] {'loss': 0.3702, 'grad_norm': 0.6686584962886666, 'learning_rate': 7.619456716785304e-06, 'epoch': 0.34} + 34%|███▍ | 4203/12188 [8:56:14<29:52:14, 13.47s/it] 34%|███▍ | 4204/12188 [8:56:21<25:45:06, 11.61s/it] {'loss': 0.3422, 'grad_norm': 0.6593125026740291, 'learning_rate': 7.618324852264252e-06, 'epoch': 0.34} + 34%|███▍ | 4204/12188 [8:56:21<25:45:06, 11.61s/it] 35%|███▍ | 4205/12188 [8:56:28<22:31:26, 10.16s/it] {'loss': 0.3741, 'grad_norm': 0.6511905035731084, 'learning_rate': 7.617192802841307e-06, 'epoch': 0.34} + 35%|███▍ | 4205/12188 [8:56:28<22:31:26, 10.16s/it] 35%|███▍ | 4206/12188 [8:56:35<20:24:16, 9.20s/it] {'loss': 0.343, 'grad_norm': 0.6315667385825471, 'learning_rate': 7.61606056859641e-06, 'epoch': 0.35} + 35%|███▍ | 4206/12188 [8:56:35<20:24:16, 9.20s/it] 35%|███▍ | 4207/12188 [8:56:43<19:15:35, 8.69s/it] {'loss': 0.3476, 'grad_norm': 0.5971311019704033, 'learning_rate': 7.61492814960952e-06, 'epoch': 0.35} + 35%|███▍ | 4207/12188 [8:56:43<19:15:35, 8.69s/it] 35%|███▍ | 4208/12188 [8:56:51<19:00:18, 8.57s/it] {'loss': 0.3678, 'grad_norm': 0.6594458193600418, 'learning_rate': 7.613795545960602e-06, 'epoch': 0.35} + 35%|███▍ | 4208/12188 [8:56:51<19:00:18, 8.57s/it] 35%|███▍ | 4209/12188 [8:57:12<27:15:02, 12.30s/it] {'loss': 0.3944, 'grad_norm': 0.6374718536019456, 'learning_rate': 7.6126627577296455e-06, 'epoch': 0.35} + 35%|███▍ | 4209/12188 [8:57:12<27:15:02, 12.30s/it] 35%|███▍ | 4210/12188 [8:57:38<36:48:35, 16.61s/it] {'loss': 0.3706, 'grad_norm': 0.646288074249693, 'learning_rate': 7.6115297849966405e-06, 'epoch': 0.35} + 35%|███▍ | 4210/12188 [8:57:38<36:48:35, 16.61s/it] 35%|███▍ | 4211/12188 [8:58:27<58:00:15, 26.18s/it] {'loss': 0.3935, 'grad_norm': 0.673964731650604, 'learning_rate': 7.610396627841599e-06, 'epoch': 0.35} + 35%|███▍ | 4211/12188 [8:58:27<58:00:15, 26.18s/it] 35%|███▍ | 4212/12188 [8:58:46<53:29:45, 24.15s/it] {'loss': 0.3414, 'grad_norm': 0.6316129950973488, 'learning_rate': 7.609263286344542e-06, 'epoch': 0.35} + 35%|███▍ | 4212/12188 [8:58:46<53:29:45, 24.15s/it] 35%|███▍ | 4213/12188 [8:59:06<50:17:21, 22.70s/it] {'loss': 0.3683, 'grad_norm': 0.6192726674243323, 'learning_rate': 7.6081297605855035e-06, 'epoch': 0.35} + 35%|███▍ | 4213/12188 [8:59:06<50:17:21, 22.70s/it] 35%|███▍ | 4214/12188 [8:59:46<62:12:04, 28.08s/it] {'loss': 0.3569, 'grad_norm': 0.6197332532087737, 'learning_rate': 7.60699605064453e-06, 'epoch': 0.35} + 35%|███▍ | 4214/12188 [8:59:46<62:12:04, 28.08s/it] 35%|███▍ | 4215/12188 [9:00:22<67:26:21, 30.45s/it] {'loss': 0.3019, 'grad_norm': 0.606347068074032, 'learning_rate': 7.605862156601685e-06, 'epoch': 0.35} + 35%|███▍ | 4215/12188 [9:00:22<67:26:21, 30.45s/it] 35%|███▍ | 4216/12188 [9:00:30<52:11:57, 23.57s/it] {'loss': 0.3779, 'grad_norm': 0.6653438710702703, 'learning_rate': 7.604728078537041e-06, 'epoch': 0.35} + 35%|███▍ | 4216/12188 [9:00:30<52:11:57, 23.57s/it] 35%|███▍ | 4217/12188 [9:00:51<50:20:07, 22.73s/it] {'loss': 0.343, 'grad_norm': 0.6223921772266564, 'learning_rate': 7.603593816530683e-06, 'epoch': 0.35} + 35%|███▍ | 4217/12188 [9:00:51<50:20:07, 22.73s/it] 35%|███▍ | 4218/12188 [9:00:58<39:53:21, 18.02s/it] {'loss': 0.3797, 'grad_norm': 0.6070428411815881, 'learning_rate': 7.602459370662716e-06, 'epoch': 0.35} + 35%|███▍ | 4218/12188 [9:00:58<39:53:21, 18.02s/it] 35%|███▍ | 4219/12188 [9:01:05<32:47:20, 14.81s/it] {'loss': 0.3526, 'grad_norm': 0.659610433892064, 'learning_rate': 7.601324741013247e-06, 'epoch': 0.35} + 35%|███▍ | 4219/12188 [9:01:05<32:47:20, 14.81s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 35%|███▍ | 4220/12188 [9:01:32<40:40:08, 18.37s/it] {'loss': 0.685, 'grad_norm': 0.683372606055429, 'learning_rate': 7.600189927662407e-06, 'epoch': 0.35} + 35%|███▍ | 4220/12188 [9:01:32<40:40:08, 18.37s/it] 35%|███▍ | 4221/12188 [9:02:27<65:09:44, 29.44s/it] {'loss': 0.3287, 'grad_norm': 0.6192853574908286, 'learning_rate': 7.599054930690328e-06, 'epoch': 0.35} + 35%|███▍ | 4221/12188 [9:02:27<65:09:44, 29.44s/it] 35%|███▍ | 4222/12188 [9:02:34<50:17:50, 22.73s/it] {'loss': 0.3488, 'grad_norm': 0.6189136393817631, 'learning_rate': 7.597919750177168e-06, 'epoch': 0.35} + 35%|███▍ | 4222/12188 [9:02:34<50:17:50, 22.73s/it] 35%|███▍ | 4223/12188 [9:02:43<40:54:49, 18.49s/it] {'loss': 0.3754, 'grad_norm': 0.6529379163612716, 'learning_rate': 7.596784386203089e-06, 'epoch': 0.35} + 35%|███▍ | 4223/12188 [9:02:43<40:54:49, 18.49s/it] 35%|███▍ | 4224/12188 [9:03:08<45:35:45, 20.61s/it] {'loss': 0.3712, 'grad_norm': 0.6145389381502726, 'learning_rate': 7.595648838848269e-06, 'epoch': 0.35} + 35%|███▍ | 4224/12188 [9:03:08<45:35:45, 20.61s/it] 35%|███▍ | 4225/12188 [9:03:15<36:23:28, 16.45s/it] {'loss': 0.3185, 'grad_norm': 0.6094683308529003, 'learning_rate': 7.594513108192896e-06, 'epoch': 0.35} + 35%|███▍ | 4225/12188 [9:03:15<36:23:28, 16.45s/it] 35%|███▍ | 4226/12188 [9:03:35<38:55:18, 17.60s/it] {'loss': 0.3275, 'grad_norm': 0.688650902428089, 'learning_rate': 7.593377194317178e-06, 'epoch': 0.35} + 35%|███▍ | 4226/12188 [9:03:35<38:55:18, 17.60s/it] 35%|███▍ | 4227/12188 [9:04:18<55:22:40, 25.04s/it] {'loss': 0.3672, 'grad_norm': 0.6061120998345038, 'learning_rate': 7.59224109730133e-06, 'epoch': 0.35} + 35%|███▍ | 4227/12188 [9:04:18<55:22:40, 25.04s/it] 35%|███▍ | 4228/12188 [9:04:57<65:07:38, 29.45s/it] {'loss': 0.3452, 'grad_norm': 0.6143612657691596, 'learning_rate': 7.5911048172255785e-06, 'epoch': 0.35} + 35%|███▍ | 4228/12188 [9:04:57<65:07:38, 29.45s/it] 35%|███▍ | 4229/12188 [9:05:34<69:55:51, 31.63s/it] {'loss': 0.357, 'grad_norm': 0.6615846798131626, 'learning_rate': 7.589968354170168e-06, 'epoch': 0.35} + 35%|███▍ | 4229/12188 [9:05:34<69:55:51, 31.63s/it] 35%|███▍ | 4230/12188 [9:05:54<62:15:59, 28.17s/it] {'loss': 0.3215, 'grad_norm': 0.6151145594145898, 'learning_rate': 7.588831708215352e-06, 'epoch': 0.35} + 35%|███▍ | 4230/12188 [9:05:54<62:15:59, 28.17s/it] 35%|███▍ | 4231/12188 [9:06:13<55:52:06, 25.28s/it] {'loss': 0.3497, 'grad_norm': 0.635932913103609, 'learning_rate': 7.5876948794414015e-06, 'epoch': 0.35} + 35%|███▍ | 4231/12188 [9:06:13<55:52:06, 25.28s/it] 35%|███▍ | 4232/12188 [9:06:20<43:39:57, 19.76s/it] {'loss': 0.3477, 'grad_norm': 0.6743044942594593, 'learning_rate': 7.586557867928596e-06, 'epoch': 0.35} + 35%|███▍ | 4232/12188 [9:06:20<43:39:57, 19.76s/it] 35%|███▍ | 4233/12188 [9:07:07<62:12:32, 28.15s/it] {'loss': 0.3275, 'grad_norm': 0.6361215922338673, 'learning_rate': 7.58542067375723e-06, 'epoch': 0.35} + 35%|███▍ | 4233/12188 [9:07:07<62:12:32, 28.15s/it] 35%|███▍ | 4234/12188 [9:07:17<50:01:33, 22.64s/it] {'loss': 0.3426, 'grad_norm': 0.6550805951389614, 'learning_rate': 7.584283297007608e-06, 'epoch': 0.35} + 35%|███▍ | 4234/12188 [9:07:17<50:01:33, 22.64s/it] 35%|███▍ | 4235/12188 [9:08:04<66:08:46, 29.94s/it] {'loss': 0.3532, 'grad_norm': 0.6977456053747091, 'learning_rate': 7.583145737760053e-06, 'epoch': 0.35} + 35%|███▍ | 4235/12188 [9:08:04<66:08:46, 29.94s/it] 35%|███▍ | 4236/12188 [9:08:23<58:52:47, 26.66s/it] {'loss': 0.3903, 'grad_norm': 0.6217961920617204, 'learning_rate': 7.5820079960948975e-06, 'epoch': 0.35} + 35%|███▍ | 4236/12188 [9:08:23<58:52:47, 26.66s/it] 35%|███▍ | 4237/12188 [9:08:30<45:54:59, 20.79s/it] {'loss': 0.3584, 'grad_norm': 0.6339588251558873, 'learning_rate': 7.5808700720924845e-06, 'epoch': 0.35} + 35%|███▍ | 4237/12188 [9:08:30<45:54:59, 20.79s/it] 35%|███▍ | 4238/12188 [9:08:52<46:19:06, 20.97s/it] {'loss': 0.3531, 'grad_norm': 0.6122623741229729, 'learning_rate': 7.579731965833172e-06, 'epoch': 0.35} + 35%|███▍ | 4238/12188 [9:08:52<46:19:06, 20.97s/it] 35%|███▍ | 4239/12188 [9:08:59<37:06:36, 16.81s/it] {'loss': 0.3446, 'grad_norm': 0.5825195083240912, 'learning_rate': 7.578593677397334e-06, 'epoch': 0.35} + 35%|███▍ | 4239/12188 [9:08:59<37:06:36, 16.81s/it] 35%|███▍ | 4240/12188 [9:09:06<30:40:15, 13.89s/it] {'loss': 0.3391, 'grad_norm': 0.6077396104394803, 'learning_rate': 7.577455206865354e-06, 'epoch': 0.35} + 35%|███▍ | 4240/12188 [9:09:06<30:40:15, 13.89s/it] 35%|███▍ | 4241/12188 [9:09:13<26:17:51, 11.91s/it] {'loss': 0.3142, 'grad_norm': 0.6763596497391418, 'learning_rate': 7.5763165543176275e-06, 'epoch': 0.35} + 35%|███▍ | 4241/12188 [9:09:13<26:17:51, 11.91s/it] 35%|███▍ | 4242/12188 [9:09:20<23:17:59, 10.56s/it] {'loss': 0.3494, 'grad_norm': 0.5871255350160196, 'learning_rate': 7.5751777198345656e-06, 'epoch': 0.35} + 35%|███▍ | 4242/12188 [9:09:20<23:17:59, 10.56s/it] 35%|███▍ | 4243/12188 [9:09:41<29:41:48, 13.46s/it] {'loss': 0.3236, 'grad_norm': 0.6341308724082138, 'learning_rate': 7.574038703496589e-06, 'epoch': 0.35} + 35%|███▍ | 4243/12188 [9:09:41<29:41:48, 13.46s/it] 35%|███▍ | 4244/12188 [9:10:01<34:17:43, 15.54s/it] {'loss': 0.3692, 'grad_norm': 0.5868923729965149, 'learning_rate': 7.5728995053841355e-06, 'epoch': 0.35} + 35%|███▍ | 4244/12188 [9:10:01<34:17:43, 15.54s/it] 35%|███▍ | 4245/12188 [9:10:09<29:01:37, 13.16s/it] {'loss': 0.3362, 'grad_norm': 0.663033824117739, 'learning_rate': 7.571760125577653e-06, 'epoch': 0.35} + 35%|███▍ | 4245/12188 [9:10:09<29:01:37, 13.16s/it] 35%|███▍ | 4246/12188 [9:10:16<24:54:25, 11.29s/it] {'loss': 0.3924, 'grad_norm': 0.6292374048588996, 'learning_rate': 7.570620564157603e-06, 'epoch': 0.35} + 35%|███▍ | 4246/12188 [9:10:16<24:54:25, 11.29s/it] 35%|███▍ | 4247/12188 [9:10:22<21:50:01, 9.90s/it] {'loss': 0.3407, 'grad_norm': 0.6369732549947911, 'learning_rate': 7.569480821204457e-06, 'epoch': 0.35} + 35%|███▍ | 4247/12188 [9:10:22<21:50:01, 9.90s/it] 35%|███▍ | 4248/12188 [9:10:47<31:50:55, 14.44s/it] {'loss': 0.3915, 'grad_norm': 0.5970315724878045, 'learning_rate': 7.568340896798705e-06, 'epoch': 0.35} + 35%|███▍ | 4248/12188 [9:10:47<31:50:55, 14.44s/it] 35%|███▍ | 4249/12188 [9:10:54<26:58:57, 12.24s/it] {'loss': 0.3391, 'grad_norm': 0.6252682069447442, 'learning_rate': 7.567200791020845e-06, 'epoch': 0.35} + 35%|███▍ | 4249/12188 [9:10:54<26:58:57, 12.24s/it] 35%|███▍ | 4250/12188 [9:11:02<23:40:51, 10.74s/it] {'loss': 0.3385, 'grad_norm': 0.6495134321887972, 'learning_rate': 7.566060503951389e-06, 'epoch': 0.35} + 35%|███▍ | 4250/12188 [9:11:02<23:40:51, 10.74s/it] 35%|███▍ | 4251/12188 [9:11:08<20:55:03, 9.49s/it] {'loss': 0.3514, 'grad_norm': 0.6204995685189649, 'learning_rate': 7.564920035670863e-06, 'epoch': 0.35} + 35%|███▍ | 4251/12188 [9:11:08<20:55:03, 9.49s/it] 35%|███▍ | 4252/12188 [9:11:15<19:07:20, 8.67s/it] {'loss': 0.3767, 'grad_norm': 0.6980589131079669, 'learning_rate': 7.563779386259804e-06, 'epoch': 0.35} + 35%|███▍ | 4252/12188 [9:11:15<19:07:20, 8.67s/it] 35%|███▍ | 4253/12188 [9:11:21<17:41:18, 8.03s/it] {'loss': 0.3569, 'grad_norm': 0.637951603121941, 'learning_rate': 7.562638555798764e-06, 'epoch': 0.35} + 35%|███▍ | 4253/12188 [9:11:21<17:41:18, 8.03s/it] 35%|███▍ | 4254/12188 [9:11:29<17:20:44, 7.87s/it] {'loss': 0.3533, 'grad_norm': 0.6354906729350202, 'learning_rate': 7.561497544368309e-06, 'epoch': 0.35} + 35%|███▍ | 4254/12188 [9:11:29<17:20:44, 7.87s/it] 35%|███▍ | 4255/12188 [9:11:36<16:45:58, 7.61s/it] {'loss': 0.3527, 'grad_norm': 0.6343082681165116, 'learning_rate': 7.5603563520490116e-06, 'epoch': 0.35} + 35%|███▍ | 4255/12188 [9:11:36<16:45:58, 7.61s/it] 35%|███▍ | 4256/12188 [9:11:43<16:13:20, 7.36s/it] {'loss': 0.3281, 'grad_norm': 0.5956771182146808, 'learning_rate': 7.559214978921461e-06, 'epoch': 0.35} + 35%|███▍ | 4256/12188 [9:11:43<16:13:20, 7.36s/it] 35%|███▍ | 4257/12188 [9:11:50<15:51:31, 7.20s/it] {'loss': 0.3419, 'grad_norm': 0.6124560071680589, 'learning_rate': 7.558073425066261e-06, 'epoch': 0.35} + 35%|███▍ | 4257/12188 [9:11:50<15:51:31, 7.20s/it] 35%|███▍ | 4258/12188 [9:11:57<16:10:58, 7.35s/it] {'loss': 0.3378, 'grad_norm': 0.6352194252186627, 'learning_rate': 7.556931690564025e-06, 'epoch': 0.35} + 35%|███▍ | 4258/12188 [9:11:57<16:10:58, 7.35s/it] 35%|███▍ | 4259/12188 [9:12:06<17:02:10, 7.74s/it] {'loss': 0.317, 'grad_norm': 0.5691971213388486, 'learning_rate': 7.555789775495381e-06, 'epoch': 0.35} + 35%|███▍ | 4259/12188 [9:12:06<17:02:10, 7.74s/it] 35%|███▍ | 4260/12188 [9:12:13<16:54:36, 7.68s/it] {'loss': 0.3314, 'grad_norm': 0.6216878617862772, 'learning_rate': 7.554647679940971e-06, 'epoch': 0.35} + 35%|███▍ | 4260/12188 [9:12:13<16:54:36, 7.68s/it] 35%|███▍ | 4261/12188 [9:12:20<16:05:12, 7.31s/it] {'loss': 0.3349, 'grad_norm': 0.6576566668282812, 'learning_rate': 7.553505403981445e-06, 'epoch': 0.35} + 35%|███▍ | 4261/12188 [9:12:20<16:05:12, 7.31s/it] 35%|███▍ | 4262/12188 [9:12:26<15:38:19, 7.10s/it] {'loss': 0.3554, 'grad_norm': 0.6662283184890773, 'learning_rate': 7.552362947697469e-06, 'epoch': 0.35} + 35%|███▍ | 4262/12188 [9:12:27<15:38:19, 7.10s/it] 35%|███▍ | 4263/12188 [9:12:33<15:30:52, 7.05s/it] {'loss': 0.3696, 'grad_norm': 0.6864579523716543, 'learning_rate': 7.551220311169724e-06, 'epoch': 0.35} + 35%|███▍ | 4263/12188 [9:12:33<15:30:52, 7.05s/it] 35%|███▍ | 4264/12188 [9:12:41<15:58:29, 7.26s/it] {'loss': 0.3601, 'grad_norm': 0.635300120223772, 'learning_rate': 7.550077494478898e-06, 'epoch': 0.35} + 35%|███▍ | 4264/12188 [9:12:41<15:58:29, 7.26s/it] 35%|███▍ | 4265/12188 [9:12:48<15:32:29, 7.06s/it] {'loss': 0.3835, 'grad_norm': 0.628518939642808, 'learning_rate': 7.548934497705698e-06, 'epoch': 0.35} + 35%|███▍ | 4265/12188 [9:12:48<15:32:29, 7.06s/it] 35%|███▌ | 4266/12188 [9:12:55<15:35:12, 7.08s/it] {'loss': 0.3208, 'grad_norm': 0.6394347960841288, 'learning_rate': 7.5477913209308365e-06, 'epoch': 0.35} + 35%|███▌ | 4266/12188 [9:12:55<15:35:12, 7.08s/it] 35%|███▌ | 4267/12188 [9:13:02<15:38:12, 7.11s/it] {'loss': 0.329, 'grad_norm': 0.7221784985659809, 'learning_rate': 7.546647964235046e-06, 'epoch': 0.35} + 35%|███▌ | 4267/12188 [9:13:02<15:38:12, 7.11s/it] 35%|███▌ | 4268/12188 [9:13:09<15:14:55, 6.93s/it] {'loss': 0.3652, 'grad_norm': 0.6275033853648736, 'learning_rate': 7.545504427699071e-06, 'epoch': 0.35} + 35%|███▌ | 4268/12188 [9:13:09<15:14:55, 6.93s/it] 35%|███▌ | 4269/12188 [9:13:16<15:16:10, 6.94s/it] {'loss': 0.3448, 'grad_norm': 0.5861022241181714, 'learning_rate': 7.54436071140366e-06, 'epoch': 0.35} + 35%|███▌ | 4269/12188 [9:13:16<15:16:10, 6.94s/it] 35%|███▌ | 4270/12188 [9:13:22<15:11:10, 6.90s/it] {'loss': 0.3488, 'grad_norm': 0.6659024782185522, 'learning_rate': 7.543216815429583e-06, 'epoch': 0.35} + 35%|███▌ | 4270/12188 [9:13:22<15:11:10, 6.90s/it] 35%|███▌ | 4271/12188 [9:13:30<15:56:38, 7.25s/it] {'loss': 0.3226, 'grad_norm': 0.6463002426729056, 'learning_rate': 7.542072739857621e-06, 'epoch': 0.35} + 35%|███▌ | 4271/12188 [9:13:30<15:56:38, 7.25s/it] 35%|███▌ | 4272/12188 [9:13:37<15:44:46, 7.16s/it] {'loss': 0.3967, 'grad_norm': 0.6379978281913091, 'learning_rate': 7.540928484768566e-06, 'epoch': 0.35} + 35%|███▌ | 4272/12188 [9:13:37<15:44:46, 7.16s/it] 35%|███▌ | 4273/12188 [9:13:45<15:52:10, 7.22s/it] {'loss': 0.3261, 'grad_norm': 0.6194832703490142, 'learning_rate': 7.539784050243225e-06, 'epoch': 0.35} + 35%|███▌ | 4273/12188 [9:13:45<15:52:10, 7.22s/it] 35%|███▌ | 4274/12188 [9:13:51<15:29:24, 7.05s/it] {'loss': 0.4138, 'grad_norm': 0.6853577658157123, 'learning_rate': 7.538639436362414e-06, 'epoch': 0.35} + 35%|███▌ | 4274/12188 [9:13:51<15:29:24, 7.05s/it] 35%|███▌ | 4275/12188 [9:13:59<15:47:29, 7.18s/it] {'loss': 0.3619, 'grad_norm': 0.6276949067194048, 'learning_rate': 7.537494643206965e-06, 'epoch': 0.35} + 35%|███▌ | 4275/12188 [9:13:59<15:47:29, 7.18s/it] 35%|███▌ | 4276/12188 [9:14:06<15:53:43, 7.23s/it] {'loss': 0.347, 'grad_norm': 0.6646120184757417, 'learning_rate': 7.536349670857721e-06, 'epoch': 0.35} + 35%|███▌ | 4276/12188 [9:14:06<15:53:43, 7.23s/it] 35%|███▌ | 4277/12188 [9:14:13<15:49:33, 7.20s/it] {'loss': 0.3262, 'grad_norm': 0.6345240435841458, 'learning_rate': 7.535204519395538e-06, 'epoch': 0.35} + 35%|███▌ | 4277/12188 [9:14:13<15:49:33, 7.20s/it] 35%|███▌ | 4278/12188 [9:14:20<15:36:30, 7.10s/it] {'loss': 0.3397, 'grad_norm': 0.6236557023572784, 'learning_rate': 7.534059188901286e-06, 'epoch': 0.35} + 35%|███▌ | 4278/12188 [9:14:20<15:36:30, 7.10s/it] 35%|███▌ | 4279/12188 [9:14:27<15:34:31, 7.09s/it] {'loss': 0.3372, 'grad_norm': 0.6173829657257821, 'learning_rate': 7.532913679455842e-06, 'epoch': 0.35} + 35%|███▌ | 4279/12188 [9:14:27<15:34:31, 7.09s/it] 35%|███▌ | 4280/12188 [9:14:34<15:22:44, 7.00s/it] {'loss': 0.3563, 'grad_norm': 0.6587159208071837, 'learning_rate': 7.531767991140106e-06, 'epoch': 0.35} + 35%|███▌ | 4280/12188 [9:14:34<15:22:44, 7.00s/it] 35%|███▌ | 4281/12188 [9:14:42<15:39:55, 7.13s/it] {'loss': 0.3168, 'grad_norm': 0.6597391712111631, 'learning_rate': 7.530622124034983e-06, 'epoch': 0.35} + 35%|███▌ | 4281/12188 [9:14:42<15:39:55, 7.13s/it] 35%|███▌ | 4282/12188 [9:14:48<15:20:34, 6.99s/it] {'loss': 0.3475, 'grad_norm': 0.6436967924359435, 'learning_rate': 7.52947607822139e-06, 'epoch': 0.35} + 35%|███▌ | 4282/12188 [9:14:48<15:20:34, 6.99s/it] 35%|███▌ | 4283/12188 [9:14:55<15:08:17, 6.89s/it] {'loss': 0.3255, 'grad_norm': 0.5977351127833163, 'learning_rate': 7.528329853780261e-06, 'epoch': 0.35} + 35%|███▌ | 4283/12188 [9:14:55<15:08:17, 6.89s/it] 35%|███▌ | 4284/12188 [9:15:02<15:37:56, 7.12s/it] {'loss': 0.3368, 'grad_norm': 0.5776958894034172, 'learning_rate': 7.527183450792539e-06, 'epoch': 0.35} + 35%|███▌ | 4284/12188 [9:15:03<15:37:56, 7.12s/it] 35%|███▌ | 4285/12188 [9:15:10<15:45:59, 7.18s/it] {'loss': 0.4177, 'grad_norm': 0.6889093339830665, 'learning_rate': 7.526036869339182e-06, 'epoch': 0.35} + 35%|███▌ | 4285/12188 [9:15:10<15:45:59, 7.18s/it] 35%|███▌ | 4286/12188 [9:15:17<15:27:21, 7.04s/it] {'loss': 0.3585, 'grad_norm': 0.6388676181149929, 'learning_rate': 7.52489010950116e-06, 'epoch': 0.35} + 35%|███▌ | 4286/12188 [9:15:17<15:27:21, 7.04s/it] 35%|███▌ | 4287/12188 [9:15:25<16:26:21, 7.49s/it] {'loss': 0.3376, 'grad_norm': 0.6778103223709722, 'learning_rate': 7.523743171359454e-06, 'epoch': 0.35} + 35%|███▌ | 4287/12188 [9:15:25<16:26:21, 7.49s/it] 35%|███▌ | 4288/12188 [9:15:32<16:06:07, 7.34s/it] {'loss': 0.3885, 'grad_norm': 0.6581683102443368, 'learning_rate': 7.522596054995061e-06, 'epoch': 0.35} + 35%|███▌ | 4288/12188 [9:15:32<16:06:07, 7.34s/it] 35%|███▌ | 4289/12188 [9:15:39<15:47:54, 7.20s/it] {'loss': 0.335, 'grad_norm': 0.6258515781265952, 'learning_rate': 7.521448760488987e-06, 'epoch': 0.35} + 35%|███▌ | 4289/12188 [9:15:39<15:47:54, 7.20s/it] 35%|███▌ | 4290/12188 [9:15:46<16:00:28, 7.30s/it] {'loss': 0.3821, 'grad_norm': 0.6263623170867634, 'learning_rate': 7.520301287922254e-06, 'epoch': 0.35} + 35%|███▌ | 4290/12188 [9:15:46<16:00:28, 7.30s/it] 35%|███▌ | 4291/12188 [9:15:54<16:16:14, 7.42s/it] {'loss': 0.3077, 'grad_norm': 0.5717338245276864, 'learning_rate': 7.519153637375892e-06, 'epoch': 0.35} + 35%|███▌ | 4291/12188 [9:15:54<16:16:14, 7.42s/it] 35%|███▌ | 4292/12188 [9:16:02<16:16:23, 7.42s/it] {'loss': 0.3629, 'grad_norm': 0.6026086667051354, 'learning_rate': 7.518005808930946e-06, 'epoch': 0.35} + 35%|███▌ | 4292/12188 [9:16:02<16:16:23, 7.42s/it] 35%|███▌ | 4293/12188 [9:16:09<16:01:51, 7.31s/it] {'loss': 0.3657, 'grad_norm': 0.6245381727471431, 'learning_rate': 7.5168578026684776e-06, 'epoch': 0.35} + 35%|███▌ | 4293/12188 [9:16:09<16:01:51, 7.31s/it] 35%|███▌ | 4294/12188 [9:16:46<35:42:34, 16.29s/it] {'loss': 0.3465, 'grad_norm': 0.6257045745573946, 'learning_rate': 7.515709618669553e-06, 'epoch': 0.35} + 35%|███▌ | 4294/12188 [9:16:46<35:42:34, 16.29s/it] 35%|███▌ | 4295/12188 [9:16:54<30:21:45, 13.85s/it] {'loss': 0.3571, 'grad_norm': 0.6488353078263582, 'learning_rate': 7.514561257015258e-06, 'epoch': 0.35} + 35%|███▌ | 4295/12188 [9:16:54<30:21:45, 13.85s/it] 35%|███▌ | 4296/12188 [9:17:23<40:32:26, 18.49s/it] {'loss': 0.3343, 'grad_norm': 0.6493271002323215, 'learning_rate': 7.513412717786689e-06, 'epoch': 0.35} + 35%|███▌ | 4296/12188 [9:17:23<40:32:26, 18.49s/it] 35%|███▌ | 4297/12188 [9:17:44<41:39:14, 19.00s/it] {'loss': 0.3412, 'grad_norm': 0.6314440378527651, 'learning_rate': 7.512264001064948e-06, 'epoch': 0.35} + 35%|███▌ | 4297/12188 [9:17:44<41:39:14, 19.00s/it] 35%|███▌ | 4298/12188 [9:17:50<33:42:59, 15.38s/it] {'loss': 0.336, 'grad_norm': 0.7105644261300093, 'learning_rate': 7.5111151069311614e-06, 'epoch': 0.35} + 35%|███▌ | 4298/12188 [9:17:50<33:42:59, 15.38s/it] 35%|███▌ | 4299/12188 [9:18:30<49:30:48, 22.59s/it] {'loss': 0.3211, 'grad_norm': 0.6230930593812091, 'learning_rate': 7.509966035466461e-06, 'epoch': 0.35} + 35%|███▌ | 4299/12188 [9:18:30<49:30:48, 22.59s/it] 35%|███▌ | 4300/12188 [9:19:20<67:52:59, 30.98s/it] {'loss': 0.3422, 'grad_norm': 0.6395672974985945, 'learning_rate': 7.508816786751991e-06, 'epoch': 0.35} + 35%|███▌ | 4300/12188 [9:19:20<67:52:59, 30.98s/it] 35%|███▌ | 4301/12188 [9:19:41<60:46:28, 27.74s/it] {'loss': 0.3211, 'grad_norm': 0.5982304442826913, 'learning_rate': 7.50766736086891e-06, 'epoch': 0.35} + 35%|███▌ | 4301/12188 [9:19:41<60:46:28, 27.74s/it] 35%|███▌ | 4302/12188 [9:19:47<46:55:54, 21.42s/it] {'loss': 0.3325, 'grad_norm': 0.6296563833320001, 'learning_rate': 7.50651775789839e-06, 'epoch': 0.35} + 35%|███▌ | 4302/12188 [9:19:47<46:55:54, 21.42s/it] 35%|███▌ | 4303/12188 [9:20:30<61:11:59, 27.94s/it] {'loss': 0.3186, 'grad_norm': 0.6063132479136442, 'learning_rate': 7.505367977921611e-06, 'epoch': 0.35} + 35%|███▌ | 4303/12188 [9:20:30<61:11:59, 27.94s/it] 35%|███▌ | 4304/12188 [9:20:50<55:52:17, 25.51s/it] {'loss': 0.3686, 'grad_norm': 0.6617527989235938, 'learning_rate': 7.504218021019773e-06, 'epoch': 0.35} + 35%|███▌ | 4304/12188 [9:20:50<55:52:17, 25.51s/it] 35%|███▌ | 4305/12188 [9:21:47<76:34:30, 34.97s/it] {'loss': 0.3787, 'grad_norm': 0.6366464504170396, 'learning_rate': 7.503067887274081e-06, 'epoch': 0.35} + 35%|███▌ | 4305/12188 [9:21:47<76:34:30, 34.97s/it] 35%|███▌ | 4306/12188 [9:22:30<81:52:51, 37.40s/it] {'loss': 0.3503, 'grad_norm': 0.6223089949692734, 'learning_rate': 7.501917576765754e-06, 'epoch': 0.35} + 35%|███▌ | 4306/12188 [9:22:30<81:52:51, 37.40s/it] 35%|███▌ | 4307/12188 [9:22:51<70:47:32, 32.34s/it] {'loss': 0.3318, 'grad_norm': 0.6255287942167347, 'learning_rate': 7.500767089576029e-06, 'epoch': 0.35} + 35%|███▌ | 4307/12188 [9:22:51<70:47:32, 32.34s/it] 35%|███▌ | 4308/12188 [9:22:58<54:21:42, 24.84s/it] {'loss': 0.3448, 'grad_norm': 0.6440984228788312, 'learning_rate': 7.499616425786151e-06, 'epoch': 0.35} + 35%|███▌ | 4308/12188 [9:22:58<54:21:42, 24.84s/it] 35%|███▌ | 4309/12188 [9:23:05<42:23:59, 19.37s/it] {'loss': 0.3575, 'grad_norm': 0.6433205251759967, 'learning_rate': 7.498465585477376e-06, 'epoch': 0.35} + 35%|███▌ | 4309/12188 [9:23:05<42:23:59, 19.37s/it] 35%|███▌ | 4310/12188 [9:23:13<34:52:40, 15.94s/it] {'loss': 0.3615, 'grad_norm': 0.6174748092036719, 'learning_rate': 7.497314568730975e-06, 'epoch': 0.35} + 35%|███▌ | 4310/12188 [9:23:13<34:52:40, 15.94s/it] 35%|███▌ | 4311/12188 [9:23:20<29:05:57, 13.30s/it] {'loss': 0.3662, 'grad_norm': 0.7916555641271639, 'learning_rate': 7.496163375628232e-06, 'epoch': 0.35} + 35%|███▌ | 4311/12188 [9:23:20<29:05:57, 13.30s/it] 35%|███▌ | 4312/12188 [9:23:57<44:33:20, 20.37s/it] {'loss': 0.3306, 'grad_norm': 0.6586975818400768, 'learning_rate': 7.495012006250441e-06, 'epoch': 0.35} + 35%|███▌ | 4312/12188 [9:23:57<44:33:20, 20.37s/it] 35%|███▌ | 4313/12188 [9:24:04<35:45:45, 16.35s/it] {'loss': 0.3375, 'grad_norm': 0.6452707928966408, 'learning_rate': 7.493860460678913e-06, 'epoch': 0.35} + 35%|███▌ | 4313/12188 [9:24:04<35:45:45, 16.35s/it] 35%|███▌ | 4314/12188 [9:24:11<30:04:19, 13.75s/it] {'loss': 0.3553, 'grad_norm': 0.6728464603494987, 'learning_rate': 7.492708738994963e-06, 'epoch': 0.35} + 35%|███▌ | 4314/12188 [9:24:11<30:04:19, 13.75s/it] 35%|███▌ | 4315/12188 [9:24:19<25:52:46, 11.83s/it] {'loss': 0.3402, 'grad_norm': 0.6272506664444788, 'learning_rate': 7.491556841279927e-06, 'epoch': 0.35} + 35%|███▌ | 4315/12188 [9:24:19<25:52:46, 11.83s/it] 35%|███▌ | 4316/12188 [9:24:27<23:28:07, 10.73s/it] {'loss': 0.3618, 'grad_norm': 0.6311330295476362, 'learning_rate': 7.490404767615151e-06, 'epoch': 0.35} + 35%|███▌ | 4316/12188 [9:24:27<23:28:07, 10.73s/it] 35%|███▌ | 4317/12188 [9:24:34<21:05:17, 9.65s/it] {'loss': 0.3784, 'grad_norm': 0.6400532587798117, 'learning_rate': 7.489252518081992e-06, 'epoch': 0.35} + 35%|███▌ | 4317/12188 [9:24:34<21:05:17, 9.65s/it] 35%|███▌ | 4318/12188 [9:24:42<19:45:39, 9.04s/it] {'loss': 0.3485, 'grad_norm': 0.642829908089807, 'learning_rate': 7.488100092761818e-06, 'epoch': 0.35} + 35%|███▌ | 4318/12188 [9:24:42<19:45:39, 9.04s/it] 35%|███▌ | 4319/12188 [9:24:50<19:10:45, 8.77s/it] {'loss': 0.3537, 'grad_norm': 0.6694575577293749, 'learning_rate': 7.486947491736014e-06, 'epoch': 0.35} + 35%|███▌ | 4319/12188 [9:24:50<19:10:45, 8.77s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f13ef8caa20> +[Try #0] Failed to fetch sample 4345383 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f13ef8caa20> +Problematic sample: {'image': '20240823_045930_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Class: feedback_link_icon-DS-EntryPoint1-1'"}, {'from': 'gpt', 'value': '\nclick(x=0.9035, y=0.921)\n'}]} + 35%|███▌ | 4320/12188 [9:24:57<18:04:18, 8.27s/it] {'loss': 0.3723, 'grad_norm': 0.6751649659868225, 'learning_rate': 7.485794715085972e-06, 'epoch': 0.35} + 35%|███▌ | 4320/12188 [9:24:57<18:04:18, 8.27s/it] 35%|███▌ | 4321/12188 [9:25:05<17:45:06, 8.12s/it] {'loss': 0.3433, 'grad_norm': 0.6565596275845019, 'learning_rate': 7.484641762893103e-06, 'epoch': 0.35} + 35%|███▌ | 4321/12188 [9:25:05<17:45:06, 8.12s/it] 35%|███▌ | 4322/12188 [9:25:12<16:54:14, 7.74s/it] {'loss': 0.36, 'grad_norm': 0.6475155859450633, 'learning_rate': 7.4834886352388235e-06, 'epoch': 0.35} + 35%|███▌ | 4322/12188 [9:25:12<16:54:14, 7.74s/it] 35%|███▌ | 4323/12188 [9:25:19<16:22:56, 7.50s/it] {'loss': 0.3622, 'grad_norm': 0.6567303792492567, 'learning_rate': 7.482335332204568e-06, 'epoch': 0.35} + 35%|███▌ | 4323/12188 [9:25:19<16:22:56, 7.50s/it] 35%|███▌ | 4324/12188 [9:25:25<15:59:49, 7.32s/it] {'loss': 0.3891, 'grad_norm': 0.6658836652188527, 'learning_rate': 7.481181853871779e-06, 'epoch': 0.35} + 35%|███▌ | 4324/12188 [9:25:25<15:59:49, 7.32s/it] 35%|███▌ | 4325/12188 [9:25:33<16:03:30, 7.35s/it] {'loss': 0.3469, 'grad_norm': 0.6317212592335821, 'learning_rate': 7.4800282003219125e-06, 'epoch': 0.35} + 35%|███▌ | 4325/12188 [9:25:33<16:03:30, 7.35s/it] 35%|███▌ | 4326/12188 [9:25:40<15:39:24, 7.17s/it] {'loss': 0.3809, 'grad_norm': 0.6475511231525789, 'learning_rate': 7.47887437163644e-06, 'epoch': 0.35} + 35%|███▌ | 4326/12188 [9:25:40<15:39:24, 7.17s/it] 36%|███▌ | 4327/12188 [9:25:47<16:04:18, 7.36s/it] {'loss': 0.3408, 'grad_norm': 0.6627038432445448, 'learning_rate': 7.477720367896841e-06, 'epoch': 0.36} + 36%|███▌ | 4327/12188 [9:25:47<16:04:18, 7.36s/it] 36%|███▌ | 4328/12188 [9:25:55<16:00:55, 7.34s/it] {'loss': 0.3403, 'grad_norm': 0.6629144233435889, 'learning_rate': 7.476566189184611e-06, 'epoch': 0.36} + 36%|███▌ | 4328/12188 [9:25:55<16:00:55, 7.34s/it] 36%|███▌ | 4329/12188 [9:26:02<15:45:57, 7.22s/it] {'loss': 0.3314, 'grad_norm': 0.6182701765851676, 'learning_rate': 7.4754118355812555e-06, 'epoch': 0.36} + 36%|███▌ | 4329/12188 [9:26:02<15:45:57, 7.22s/it] 36%|███▌ | 4330/12188 [9:26:08<15:29:42, 7.10s/it] {'loss': 0.335, 'grad_norm': 0.6460695654600577, 'learning_rate': 7.474257307168294e-06, 'epoch': 0.36} + 36%|███▌ | 4330/12188 [9:26:08<15:29:42, 7.10s/it] 36%|███▌ | 4331/12188 [9:26:15<15:19:51, 7.02s/it] {'loss': 0.3828, 'grad_norm': 0.7087393806172034, 'learning_rate': 7.473102604027256e-06, 'epoch': 0.36} + 36%|███▌ | 4331/12188 [9:26:15<15:19:51, 7.02s/it] 36%|███▌ | 4332/12188 [9:26:23<15:34:02, 7.13s/it] {'loss': 0.3568, 'grad_norm': 0.6405880082173135, 'learning_rate': 7.471947726239686e-06, 'epoch': 0.36} + 36%|███▌ | 4332/12188 [9:26:23<15:34:02, 7.13s/it] 36%|███▌ | 4333/12188 [9:26:30<15:40:38, 7.19s/it] {'loss': 0.3264, 'grad_norm': 0.5982838278312579, 'learning_rate': 7.470792673887137e-06, 'epoch': 0.36} + 36%|███▌ | 4333/12188 [9:26:30<15:40:38, 7.19s/it] 36%|███▌ | 4334/12188 [9:26:37<15:41:37, 7.19s/it] {'loss': 0.3313, 'grad_norm': 0.610301576918131, 'learning_rate': 7.4696374470511816e-06, 'epoch': 0.36} + 36%|███▌ | 4334/12188 [9:26:37<15:41:37, 7.19s/it] 36%|███▌ | 4335/12188 [9:26:45<15:48:06, 7.24s/it] {'loss': 0.3257, 'grad_norm': 0.6011528764884647, 'learning_rate': 7.468482045813397e-06, 'epoch': 0.36} + 36%|███▌ | 4335/12188 [9:26:45<15:48:06, 7.24s/it] 36%|███▌ | 4336/12188 [9:26:52<15:45:45, 7.23s/it] {'loss': 0.3608, 'grad_norm': 0.5929875644428972, 'learning_rate': 7.467326470255375e-06, 'epoch': 0.36} + 36%|███▌ | 4336/12188 [9:26:52<15:45:45, 7.23s/it] 36%|███▌ | 4337/12188 [9:27:00<16:38:24, 7.63s/it] {'loss': 0.3795, 'grad_norm': 0.6236428836693166, 'learning_rate': 7.466170720458723e-06, 'epoch': 0.36} + 36%|███▌ | 4337/12188 [9:27:00<16:38:24, 7.63s/it] 36%|███▌ | 4338/12188 [9:27:07<16:05:13, 7.38s/it] {'loss': 0.3186, 'grad_norm': 0.6106339630704036, 'learning_rate': 7.4650147965050565e-06, 'epoch': 0.36} + 36%|███▌ | 4338/12188 [9:27:07<16:05:13, 7.38s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fd2dfd37e70> +[Try #0] Failed to fetch sample 4477783 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fd2dfd37e70> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Community portal'"}, {'from': 'gpt', 'value': '\nclick(x=0.865, y=0.226)\n'}]} + 36%|███▌ | 4339/12188 [9:27:15<16:05:41, 7.38s/it] {'loss': 0.3726, 'grad_norm': 0.6369288925068904, 'learning_rate': 7.463858698476005e-06, 'epoch': 0.36} + 36%|███▌ | 4339/12188 [9:27:15<16:05:41, 7.38s/it] 36%|███▌ | 4340/12188 [9:27:21<15:33:09, 7.13s/it] {'loss': 0.3364, 'grad_norm': 0.6862770160706607, 'learning_rate': 7.462702426453211e-06, 'epoch': 0.36} + 36%|███▌ | 4340/12188 [9:27:21<15:33:09, 7.13s/it] 36%|███▌ | 4341/12188 [9:27:30<17:01:07, 7.81s/it] {'loss': 0.305, 'grad_norm': 0.5938372492364701, 'learning_rate': 7.461545980518329e-06, 'epoch': 0.36} + 36%|███▌ | 4341/12188 [9:27:30<17:01:07, 7.81s/it] 36%|███▌ | 4342/12188 [9:27:38<17:03:09, 7.82s/it] {'loss': 0.3328, 'grad_norm': 0.6212205515244796, 'learning_rate': 7.460389360753022e-06, 'epoch': 0.36} + 36%|███▌ | 4342/12188 [9:27:38<17:03:09, 7.82s/it] 36%|███▌ | 4343/12188 [9:27:45<16:32:11, 7.59s/it] {'loss': 0.3401, 'grad_norm': 0.6656243008759053, 'learning_rate': 7.459232567238974e-06, 'epoch': 0.36} + 36%|███▌ | 4343/12188 [9:27:45<16:32:11, 7.59s/it] 36%|███▌ | 4344/12188 [9:27:53<16:24:02, 7.53s/it] {'loss': 0.3283, 'grad_norm': 0.6547177870983455, 'learning_rate': 7.458075600057872e-06, 'epoch': 0.36} + 36%|███▌ | 4344/12188 [9:27:53<16:24:02, 7.53s/it] 36%|███▌ | 4345/12188 [9:28:00<16:03:08, 7.37s/it] {'loss': 0.3486, 'grad_norm': 0.6582335482137988, 'learning_rate': 7.456918459291422e-06, 'epoch': 0.36} + 36%|███▌ | 4345/12188 [9:28:00<16:03:08, 7.37s/it] 36%|███▌ | 4346/12188 [9:28:09<17:10:19, 7.88s/it] {'loss': 0.3478, 'grad_norm': 0.6324710227916306, 'learning_rate': 7.455761145021335e-06, 'epoch': 0.36} + 36%|███▌ | 4346/12188 [9:28:09<17:10:19, 7.88s/it] 36%|███▌ | 4347/12188 [9:28:18<17:41:25, 8.12s/it] {'loss': 0.3496, 'grad_norm': 0.6414716709773297, 'learning_rate': 7.454603657329343e-06, 'epoch': 0.36} + 36%|███▌ | 4347/12188 [9:28:18<17:41:25, 8.12s/it] 36%|███▌ | 4348/12188 [9:28:25<17:17:35, 7.94s/it] {'loss': 0.3227, 'grad_norm': 0.6507135276064299, 'learning_rate': 7.453445996297184e-06, 'epoch': 0.36} + 36%|███▌ | 4348/12188 [9:28:25<17:17:35, 7.94s/it] 36%|███▌ | 4349/12188 [9:28:32<16:45:51, 7.70s/it] {'loss': 0.3678, 'grad_norm': 0.664473897068423, 'learning_rate': 7.452288162006611e-06, 'epoch': 0.36} + 36%|███▌ | 4349/12188 [9:28:32<16:45:51, 7.70s/it] 36%|███▌ | 4350/12188 [9:28:39<16:28:51, 7.57s/it] {'loss': 0.3529, 'grad_norm': 0.6689909548618833, 'learning_rate': 7.451130154539388e-06, 'epoch': 0.36} + 36%|███▌ | 4350/12188 [9:28:39<16:28:51, 7.57s/it] 36%|███▌ | 4351/12188 [9:28:47<16:18:26, 7.49s/it] {'loss': 0.373, 'grad_norm': 0.653677999501801, 'learning_rate': 7.44997197397729e-06, 'epoch': 0.36} + 36%|███▌ | 4351/12188 [9:28:47<16:18:26, 7.49s/it] 36%|███▌ | 4352/12188 [9:28:54<15:51:10, 7.28s/it] {'loss': 0.3737, 'grad_norm': 0.6383447491320662, 'learning_rate': 7.448813620402109e-06, 'epoch': 0.36} + 36%|███▌ | 4352/12188 [9:28:54<15:51:10, 7.28s/it] 36%|███▌ | 4353/12188 [9:29:01<16:15:00, 7.47s/it] {'loss': 0.3309, 'grad_norm': 0.591868408217995, 'learning_rate': 7.4476550938956445e-06, 'epoch': 0.36} + 36%|███▌ | 4353/12188 [9:29:01<16:15:00, 7.47s/it] 36%|███▌ | 4354/12188 [9:29:09<16:06:18, 7.40s/it] {'loss': 0.365, 'grad_norm': 0.6815575533534833, 'learning_rate': 7.446496394539708e-06, 'epoch': 0.36} + 36%|███▌ | 4354/12188 [9:29:09<16:06:18, 7.40s/it] 36%|███▌ | 4355/12188 [9:29:16<16:08:27, 7.42s/it] {'loss': 0.3402, 'grad_norm': 0.6733698593491041, 'learning_rate': 7.445337522416129e-06, 'epoch': 0.36} + 36%|███▌ | 4355/12188 [9:29:16<16:08:27, 7.42s/it] 36%|███▌ | 4356/12188 [9:29:23<15:49:42, 7.28s/it] {'loss': 0.3617, 'grad_norm': 0.6394825126233591, 'learning_rate': 7.44417847760674e-06, 'epoch': 0.36} + 36%|███▌ | 4356/12188 [9:29:23<15:49:42, 7.28s/it] 36%|███▌ | 4357/12188 [9:29:31<16:16:04, 7.48s/it] {'loss': 0.3555, 'grad_norm': 0.6048246932515794, 'learning_rate': 7.443019260193396e-06, 'epoch': 0.36} + 36%|███▌ | 4357/12188 [9:29:31<16:16:04, 7.48s/it] 36%|███▌ | 4358/12188 [9:29:39<16:28:06, 7.57s/it] {'loss': 0.3421, 'grad_norm': 0.578084433260552, 'learning_rate': 7.441859870257958e-06, 'epoch': 0.36} + 36%|███▌ | 4358/12188 [9:29:39<16:28:06, 7.57s/it] 36%|███▌ | 4359/12188 [9:29:46<16:01:56, 7.37s/it] {'loss': 0.3185, 'grad_norm': 0.615063443666004, 'learning_rate': 7.4407003078822984e-06, 'epoch': 0.36} + 36%|███▌ | 4359/12188 [9:29:46<16:01:56, 7.37s/it] 36%|███▌ | 4360/12188 [9:29:53<15:45:54, 7.25s/it] {'loss': 0.3352, 'grad_norm': 0.6187348676420912, 'learning_rate': 7.439540573148301e-06, 'epoch': 0.36} + 36%|███▌ | 4360/12188 [9:29:53<15:45:54, 7.25s/it] 36%|███▌ | 4361/12188 [9:30:01<16:09:33, 7.43s/it] {'loss': 0.3592, 'grad_norm': 0.6151791871930327, 'learning_rate': 7.438380666137871e-06, 'epoch': 0.36} + 36%|███▌ | 4361/12188 [9:30:01<16:09:33, 7.43s/it] 36%|███▌ | 4362/12188 [9:30:08<16:02:18, 7.38s/it] {'loss': 0.3489, 'grad_norm': 0.6242779793229818, 'learning_rate': 7.437220586932915e-06, 'epoch': 0.36} + 36%|███▌ | 4362/12188 [9:30:08<16:02:18, 7.38s/it] 36%|███▌ | 4363/12188 [9:30:15<15:53:56, 7.31s/it] {'loss': 0.3432, 'grad_norm': 0.6512260872191291, 'learning_rate': 7.436060335615357e-06, 'epoch': 0.36} + 36%|███▌ | 4363/12188 [9:30:15<15:53:56, 7.31s/it] 36%|███▌ | 4364/12188 [9:30:23<16:13:37, 7.47s/it] {'loss': 0.3329, 'grad_norm': 0.7132103600101476, 'learning_rate': 7.434899912267132e-06, 'epoch': 0.36} + 36%|███▌ | 4364/12188 [9:30:23<16:13:37, 7.47s/it] 36%|███▌ | 4365/12188 [9:30:30<16:06:29, 7.41s/it] {'loss': 0.3987, 'grad_norm': 0.6285569652877508, 'learning_rate': 7.433739316970188e-06, 'epoch': 0.36} + 36%|███▌ | 4365/12188 [9:30:30<16:06:29, 7.41s/it] 36%|███▌ | 4366/12188 [9:30:40<17:41:50, 8.15s/it] {'loss': 0.3439, 'grad_norm': 0.601290249993666, 'learning_rate': 7.432578549806482e-06, 'epoch': 0.36} + 36%|███▌ | 4366/12188 [9:30:40<17:41:50, 8.15s/it] 36%|███▌ | 4367/12188 [9:30:47<17:11:55, 7.92s/it] {'loss': 0.3425, 'grad_norm': 0.6607392114742283, 'learning_rate': 7.4314176108579875e-06, 'epoch': 0.36} + 36%|███▌ | 4367/12188 [9:30:47<17:11:55, 7.92s/it] 36%|███▌ | 4368/12188 [9:30:55<16:57:59, 7.81s/it] {'loss': 0.3558, 'grad_norm': 0.6942147642375738, 'learning_rate': 7.430256500206687e-06, 'epoch': 0.36} + 36%|███▌ | 4368/12188 [9:30:55<16:57:59, 7.81s/it] 36%|███▌ | 4369/12188 [9:31:02<16:39:04, 7.67s/it] {'loss': 0.3652, 'grad_norm': 0.6560675961717752, 'learning_rate': 7.429095217934578e-06, 'epoch': 0.36} + 36%|███▌ | 4369/12188 [9:31:02<16:39:04, 7.67s/it] 36%|███▌ | 4370/12188 [9:31:10<16:55:49, 7.80s/it] {'loss': 0.3835, 'grad_norm': 0.6638894798983005, 'learning_rate': 7.427933764123667e-06, 'epoch': 0.36} + 36%|███▌ | 4370/12188 [9:31:10<16:55:49, 7.80s/it] 36%|███▌ | 4371/12188 [9:31:18<16:39:10, 7.67s/it] {'loss': 0.3454, 'grad_norm': 0.7119980051202276, 'learning_rate': 7.426772138855974e-06, 'epoch': 0.36} + 36%|███▌ | 4371/12188 [9:31:18<16:39:10, 7.67s/it] 36%|███▌ | 4372/12188 [9:31:24<16:00:02, 7.37s/it] {'loss': 0.3467, 'grad_norm': 0.5553583297506532, 'learning_rate': 7.425610342213534e-06, 'epoch': 0.36} + 36%|███▌ | 4372/12188 [9:31:24<16:00:02, 7.37s/it] 36%|███▌ | 4373/12188 [9:31:31<15:47:11, 7.27s/it] {'loss': 0.3389, 'grad_norm': 0.5962110984757653, 'learning_rate': 7.424448374278386e-06, 'epoch': 0.36} + 36%|███▌ | 4373/12188 [9:31:31<15:47:11, 7.27s/it] 36%|███▌ | 4374/12188 [9:31:39<15:42:04, 7.23s/it] {'loss': 0.3326, 'grad_norm': 0.7044429099197352, 'learning_rate': 7.4232862351325885e-06, 'epoch': 0.36} + 36%|███▌ | 4374/12188 [9:31:39<15:42:04, 7.23s/it] 36%|███▌ | 4375/12188 [9:31:46<15:46:13, 7.27s/it] {'loss': 0.3325, 'grad_norm': 0.5930890692841625, 'learning_rate': 7.4221239248582125e-06, 'epoch': 0.36} + 36%|███▌ | 4375/12188 [9:31:46<15:46:13, 7.27s/it] 36%|███▌ | 4376/12188 [9:31:54<16:22:17, 7.54s/it] {'loss': 0.3475, 'grad_norm': 0.6969737741824142, 'learning_rate': 7.420961443537335e-06, 'epoch': 0.36} + 36%|███▌ | 4376/12188 [9:31:54<16:22:17, 7.54s/it] 36%|███▌ | 4377/12188 [9:32:01<16:00:36, 7.38s/it] {'loss': 0.3478, 'grad_norm': 0.6080119804313153, 'learning_rate': 7.419798791252051e-06, 'epoch': 0.36} + 36%|███▌ | 4377/12188 [9:32:01<16:00:36, 7.38s/it] 36%|███▌ | 4378/12188 [9:32:09<16:13:14, 7.48s/it] {'loss': 0.3428, 'grad_norm': 0.6461650894661735, 'learning_rate': 7.418635968084462e-06, 'epoch': 0.36} + 36%|███▌ | 4378/12188 [9:32:09<16:13:14, 7.48s/it] 36%|███▌ | 4379/12188 [9:32:16<15:55:42, 7.34s/it] {'loss': 0.3669, 'grad_norm': 0.8317194172719057, 'learning_rate': 7.417472974116688e-06, 'epoch': 0.36} + 36%|███▌ | 4379/12188 [9:32:16<15:55:42, 7.34s/it] 36%|███▌ | 4380/12188 [9:32:23<16:07:39, 7.44s/it] {'loss': 0.3627, 'grad_norm': 0.6679939149786575, 'learning_rate': 7.416309809430857e-06, 'epoch': 0.36} + 36%|███▌ | 4380/12188 [9:32:23<16:07:39, 7.44s/it] 36%|███▌ | 4381/12188 [9:32:31<16:03:37, 7.41s/it] {'loss': 0.3205, 'grad_norm': 0.625165080669201, 'learning_rate': 7.41514647410911e-06, 'epoch': 0.36} + 36%|███▌ | 4381/12188 [9:32:31<16:03:37, 7.41s/it] 36%|███▌ | 4382/12188 [9:32:38<15:53:32, 7.33s/it] {'loss': 0.367, 'grad_norm': 0.6056413106731233, 'learning_rate': 7.413982968233597e-06, 'epoch': 0.36} + 36%|███▌ | 4382/12188 [9:32:38<15:53:32, 7.33s/it] 36%|███▌ | 4383/12188 [9:32:44<15:20:28, 7.08s/it] {'loss': 0.3552, 'grad_norm': 0.6560467031513265, 'learning_rate': 7.412819291886486e-06, 'epoch': 0.36} + 36%|███▌ | 4383/12188 [9:32:44<15:20:28, 7.08s/it] 36%|███▌ | 4384/12188 [9:32:51<15:17:41, 7.06s/it] {'loss': 0.3432, 'grad_norm': 0.6689812545031468, 'learning_rate': 7.411655445149953e-06, 'epoch': 0.36} + 36%|███▌ | 4384/12188 [9:32:51<15:17:41, 7.06s/it] 36%|███▌ | 4385/12188 [9:32:59<15:24:18, 7.11s/it] {'loss': 0.3501, 'grad_norm': 0.6232025185902996, 'learning_rate': 7.410491428106188e-06, 'epoch': 0.36} + 36%|███▌ | 4385/12188 [9:32:59<15:24:18, 7.11s/it] 36%|███▌ | 4386/12188 [9:33:06<15:17:25, 7.06s/it] {'loss': 0.363, 'grad_norm': 0.6476007272917023, 'learning_rate': 7.409327240837392e-06, 'epoch': 0.36} + 36%|███▌ | 4386/12188 [9:33:06<15:17:25, 7.06s/it] 36%|███▌ | 4387/12188 [9:33:13<15:30:04, 7.15s/it] {'loss': 0.32, 'grad_norm': 0.6284790928942839, 'learning_rate': 7.4081628834257745e-06, 'epoch': 0.36} + 36%|███▌ | 4387/12188 [9:33:13<15:30:04, 7.15s/it] 36%|███▌ | 4388/12188 [9:33:20<15:11:31, 7.01s/it] {'loss': 0.337, 'grad_norm': 0.6758515113212977, 'learning_rate': 7.406998355953566e-06, 'epoch': 0.36} + 36%|███▌ | 4388/12188 [9:33:20<15:11:31, 7.01s/it] 36%|███▌ | 4389/12188 [9:33:27<15:06:50, 6.98s/it] {'loss': 0.3443, 'grad_norm': 0.6420267954780257, 'learning_rate': 7.405833658502999e-06, 'epoch': 0.36} + 36%|███▌ | 4389/12188 [9:33:27<15:06:50, 6.98s/it] 36%|███▌ | 4390/12188 [9:33:33<14:56:41, 6.90s/it] {'loss': 0.3227, 'grad_norm': 0.7113750503142612, 'learning_rate': 7.404668791156325e-06, 'epoch': 0.36} + 36%|███▌ | 4390/12188 [9:33:33<14:56:41, 6.90s/it] 36%|███▌ | 4391/12188 [9:33:42<15:56:43, 7.36s/it] {'loss': 0.3534, 'grad_norm': 0.6829374111887803, 'learning_rate': 7.403503753995802e-06, 'epoch': 0.36} + 36%|███▌ | 4391/12188 [9:33:42<15:56:43, 7.36s/it] 36%|███▌ | 4392/12188 [9:33:51<16:59:49, 7.85s/it] {'loss': 0.3495, 'grad_norm': 0.708935876759268, 'learning_rate': 7.402338547103708e-06, 'epoch': 0.36} + 36%|███▌ | 4392/12188 [9:33:51<16:59:49, 7.85s/it] 36%|███▌ | 4393/12188 [9:33:58<16:29:26, 7.62s/it] {'loss': 0.3713, 'grad_norm': 0.599165683619013, 'learning_rate': 7.401173170562324e-06, 'epoch': 0.36} + 36%|███▌ | 4393/12188 [9:33:58<16:29:26, 7.62s/it] 36%|███▌ | 4394/12188 [9:34:05<15:58:44, 7.38s/it] {'loss': 0.3245, 'grad_norm': 0.5973927137372558, 'learning_rate': 7.4000076244539495e-06, 'epoch': 0.36} + 36%|███▌ | 4394/12188 [9:34:05<15:58:44, 7.38s/it] 36%|███▌ | 4395/12188 [9:34:13<16:26:58, 7.60s/it] {'loss': 0.3717, 'grad_norm': 0.7043930248607715, 'learning_rate': 7.398841908860893e-06, 'epoch': 0.36} + 36%|███▌ | 4395/12188 [9:34:13<16:26:58, 7.60s/it] 36%|███▌ | 4396/12188 [9:34:20<16:13:07, 7.49s/it] {'loss': 0.3847, 'grad_norm': 0.6934282240285471, 'learning_rate': 7.397676023865473e-06, 'epoch': 0.36} + 36%|███▌ | 4396/12188 [9:34:20<16:13:07, 7.49s/it] 36%|███▌ | 4397/12188 [9:34:27<16:03:25, 7.42s/it] {'loss': 0.3872, 'grad_norm': 0.6652166108221608, 'learning_rate': 7.396509969550025e-06, 'epoch': 0.36} + 36%|███▌ | 4397/12188 [9:34:27<16:03:25, 7.42s/it] 36%|███▌ | 4398/12188 [9:34:35<16:18:39, 7.54s/it] {'loss': 0.342, 'grad_norm': 0.626383766579676, 'learning_rate': 7.395343745996895e-06, 'epoch': 0.36} + 36%|███▌ | 4398/12188 [9:34:35<16:18:39, 7.54s/it] 36%|███▌ | 4399/12188 [9:34:42<16:08:18, 7.46s/it] {'loss': 0.3204, 'grad_norm': 0.6164468244190888, 'learning_rate': 7.3941773532884365e-06, 'epoch': 0.36} + 36%|███▌ | 4399/12188 [9:34:42<16:08:18, 7.46s/it] 36%|███▌ | 4400/12188 [9:34:49<15:43:56, 7.27s/it] {'loss': 0.3592, 'grad_norm': 0.6170625821940035, 'learning_rate': 7.3930107915070204e-06, 'epoch': 0.36} + 36%|███▌ | 4400/12188 [9:34:49<15:43:56, 7.27s/it] 36%|███▌ | 4401/12188 [9:34:56<15:31:20, 7.18s/it] {'loss': 0.3633, 'grad_norm': 0.7417326315126752, 'learning_rate': 7.391844060735026e-06, 'epoch': 0.36} + 36%|███▌ | 4401/12188 [9:34:56<15:31:20, 7.18s/it] 36%|███▌ | 4402/12188 [9:35:03<15:19:52, 7.09s/it] {'loss': 0.3243, 'grad_norm': 0.6469829770265636, 'learning_rate': 7.390677161054848e-06, 'epoch': 0.36} + 36%|███▌ | 4402/12188 [9:35:03<15:19:52, 7.09s/it] 36%|███▌ | 4403/12188 [9:35:10<15:08:20, 7.00s/it] {'loss': 0.3415, 'grad_norm': 0.700525160582892, 'learning_rate': 7.389510092548889e-06, 'epoch': 0.36} + 36%|███▌ | 4403/12188 [9:35:10<15:08:20, 7.00s/it] 36%|███▌ | 4404/12188 [9:35:17<15:01:58, 6.95s/it] {'loss': 0.343, 'grad_norm': 0.6618879518988219, 'learning_rate': 7.388342855299566e-06, 'epoch': 0.36} + 36%|███▌ | 4404/12188 [9:35:17<15:01:58, 6.95s/it] 36%|███▌ | 4405/12188 [9:35:24<15:04:47, 6.98s/it] {'loss': 0.3693, 'grad_norm': 1.0733349969483126, 'learning_rate': 7.387175449389308e-06, 'epoch': 0.36} + 36%|███▌ | 4405/12188 [9:35:24<15:04:47, 6.98s/it] 36%|███▌ | 4406/12188 [9:35:31<15:06:24, 6.99s/it] {'loss': 0.3309, 'grad_norm': 0.5982129933780846, 'learning_rate': 7.386007874900556e-06, 'epoch': 0.36} + 36%|███▌ | 4406/12188 [9:35:31<15:06:24, 6.99s/it] 36%|███▌ | 4407/12188 [9:35:38<15:01:38, 6.95s/it] {'loss': 0.3797, 'grad_norm': 0.64261813241084, 'learning_rate': 7.38484013191576e-06, 'epoch': 0.36} + 36%|███▌ | 4407/12188 [9:35:38<15:01:38, 6.95s/it] 36%|███▌ | 4408/12188 [9:35:45<15:18:53, 7.09s/it] {'loss': 0.3772, 'grad_norm': 0.6678708534609936, 'learning_rate': 7.383672220517387e-06, 'epoch': 0.36} + 36%|███▌ | 4408/12188 [9:35:45<15:18:53, 7.09s/it] 36%|███▌ | 4409/12188 [9:35:52<15:13:45, 7.05s/it] {'loss': 0.3233, 'grad_norm': 0.6664458332867247, 'learning_rate': 7.3825041407879094e-06, 'epoch': 0.36} + 36%|███▌ | 4409/12188 [9:35:52<15:13:45, 7.05s/it] 36%|███▌ | 4410/12188 [9:35:59<15:03:07, 6.97s/it] {'loss': 0.3686, 'grad_norm': 0.6395899140390399, 'learning_rate': 7.381335892809819e-06, 'epoch': 0.36} + 36%|███▌ | 4410/12188 [9:35:59<15:03:07, 6.97s/it] 36%|███▌ | 4411/12188 [9:36:06<15:01:35, 6.96s/it] {'loss': 0.3672, 'grad_norm': 0.6009256617712884, 'learning_rate': 7.380167476665613e-06, 'epoch': 0.36} + 36%|███▌ | 4411/12188 [9:36:06<15:01:35, 6.96s/it] 36%|███▌ | 4412/12188 [9:36:13<15:17:14, 7.08s/it] {'loss': 0.404, 'grad_norm': 0.6845035211521446, 'learning_rate': 7.378998892437803e-06, 'epoch': 0.36} + 36%|███▌ | 4412/12188 [9:36:13<15:17:14, 7.08s/it] 36%|███▌ | 4413/12188 [9:36:21<16:10:43, 7.49s/it] {'loss': 0.335, 'grad_norm': 0.6101624247854002, 'learning_rate': 7.377830140208915e-06, 'epoch': 0.36} + 36%|███▌ | 4413/12188 [9:36:21<16:10:43, 7.49s/it] 36%|███▌ | 4414/12188 [9:36:29<16:29:34, 7.64s/it] {'loss': 0.3766, 'grad_norm': 0.6294084226537625, 'learning_rate': 7.3766612200614814e-06, 'epoch': 0.36} + 36%|███▌ | 4414/12188 [9:36:29<16:29:34, 7.64s/it] 36%|███▌ | 4415/12188 [9:36:37<16:21:15, 7.57s/it] {'loss': 0.3418, 'grad_norm': 0.6350153693266466, 'learning_rate': 7.375492132078051e-06, 'epoch': 0.36} + 36%|███▌ | 4415/12188 [9:36:37<16:21:15, 7.57s/it] 36%|███▌ | 4416/12188 [9:36:44<15:53:10, 7.36s/it] {'loss': 0.3359, 'grad_norm': 0.6374429854646978, 'learning_rate': 7.374322876341184e-06, 'epoch': 0.36} + 36%|███▌ | 4416/12188 [9:36:44<15:53:10, 7.36s/it] 36%|███▌ | 4417/12188 [9:36:51<15:34:38, 7.22s/it] {'loss': 0.3736, 'grad_norm': 0.6407877733643291, 'learning_rate': 7.373153452933449e-06, 'epoch': 0.36} + 36%|███▌ | 4417/12188 [9:36:51<15:34:38, 7.22s/it] 36%|███▌ | 4418/12188 [9:36:58<15:47:28, 7.32s/it] {'loss': 0.3309, 'grad_norm': 0.6499455093746933, 'learning_rate': 7.37198386193743e-06, 'epoch': 0.36} + 36%|███▌ | 4418/12188 [9:36:58<15:47:28, 7.32s/it] 36%|███▋ | 4419/12188 [9:37:06<16:14:55, 7.53s/it] {'loss': 0.333, 'grad_norm': 0.6763926816479953, 'learning_rate': 7.370814103435722e-06, 'epoch': 0.36} + 36%|███▋ | 4419/12188 [9:37:06<16:14:55, 7.53s/it] 36%|███▋ | 4420/12188 [9:37:13<15:57:46, 7.40s/it] {'loss': 0.3264, 'grad_norm': 0.5998832387869221, 'learning_rate': 7.3696441775109305e-06, 'epoch': 0.36} + 36%|███▋ | 4420/12188 [9:37:13<15:57:46, 7.40s/it] 36%|███▋ | 4421/12188 [9:37:21<16:02:59, 7.44s/it] {'loss': 0.3545, 'grad_norm': 0.7041598595714932, 'learning_rate': 7.368474084245676e-06, 'epoch': 0.36} + 36%|███▋ | 4421/12188 [9:37:21<16:02:59, 7.44s/it] 36%|███▋ | 4422/12188 [9:37:28<15:48:10, 7.33s/it] {'loss': 0.3685, 'grad_norm': 0.6501930347990397, 'learning_rate': 7.367303823722585e-06, 'epoch': 0.36} + 36%|███▋ | 4422/12188 [9:37:28<15:48:10, 7.33s/it] 36%|███▋ | 4423/12188 [9:37:35<15:44:50, 7.30s/it] {'loss': 0.3748, 'grad_norm': 0.6476972186666273, 'learning_rate': 7.366133396024301e-06, 'epoch': 0.36} + 36%|███▋ | 4423/12188 [9:37:35<15:44:50, 7.30s/it] 36%|███▋ | 4424/12188 [9:37:42<15:22:23, 7.13s/it] {'loss': 0.3647, 'grad_norm': 0.6601205350797885, 'learning_rate': 7.3649628012334785e-06, 'epoch': 0.36} + 36%|███▋ | 4424/12188 [9:37:42<15:22:23, 7.13s/it] 36%|███▋ | 4425/12188 [9:37:49<15:15:35, 7.08s/it] {'loss': 0.3406, 'grad_norm': 0.6339175031374529, 'learning_rate': 7.363792039432783e-06, 'epoch': 0.36} + 36%|███▋ | 4425/12188 [9:37:49<15:15:35, 7.08s/it] 36%|███▋ | 4426/12188 [9:37:55<15:00:59, 6.96s/it] {'loss': 0.3586, 'grad_norm': 0.6765117464761441, 'learning_rate': 7.362621110704892e-06, 'epoch': 0.36} + 36%|███▋ | 4426/12188 [9:37:55<15:00:59, 6.96s/it] 36%|███▋ | 4427/12188 [9:38:03<15:36:11, 7.24s/it] {'loss': 0.3411, 'grad_norm': 0.6970669120099977, 'learning_rate': 7.361450015132493e-06, 'epoch': 0.36} + 36%|███▋ | 4427/12188 [9:38:03<15:36:11, 7.24s/it] 36%|███▋ | 4428/12188 [9:38:10<15:18:08, 7.10s/it] {'loss': 0.3177, 'grad_norm': 0.7909817181359752, 'learning_rate': 7.360278752798289e-06, 'epoch': 0.36} + 36%|███▋ | 4428/12188 [9:38:10<15:18:08, 7.10s/it] 36%|███▋ | 4429/12188 [9:38:17<15:01:49, 6.97s/it] {'loss': 0.3733, 'grad_norm': 0.6819152842513383, 'learning_rate': 7.3591073237849894e-06, 'epoch': 0.36} + 36%|███▋ | 4429/12188 [9:38:17<15:01:49, 6.97s/it] 36%|███▋ | 4430/12188 [9:38:23<14:43:31, 6.83s/it] {'loss': 0.3401, 'grad_norm': 0.6436183425199609, 'learning_rate': 7.357935728175323e-06, 'epoch': 0.36} + 36%|███▋ | 4430/12188 [9:38:23<14:43:31, 6.83s/it] 36%|███▋ | 4431/12188 [9:38:30<14:54:46, 6.92s/it] {'loss': 0.3513, 'grad_norm': 0.6731149291024439, 'learning_rate': 7.356763966052023e-06, 'epoch': 0.36} + 36%|███▋ | 4431/12188 [9:38:30<14:54:46, 6.92s/it] 36%|███▋ | 4432/12188 [9:38:37<14:46:34, 6.86s/it] {'loss': 0.3549, 'grad_norm': 0.6218670400972143, 'learning_rate': 7.355592037497837e-06, 'epoch': 0.36} + 36%|███▋ | 4432/12188 [9:38:37<14:46:34, 6.86s/it] 36%|███▋ | 4433/12188 [9:38:46<15:47:54, 7.33s/it] {'loss': 0.345, 'grad_norm': 0.6380267825389364, 'learning_rate': 7.354419942595528e-06, 'epoch': 0.36} + 36%|███▋ | 4433/12188 [9:38:46<15:47:54, 7.33s/it] 36%|███▋ | 4434/12188 [9:38:52<15:30:24, 7.20s/it] {'loss': 0.3622, 'grad_norm': 0.6609139292955213, 'learning_rate': 7.353247681427864e-06, 'epoch': 0.36} + 36%|███▋ | 4434/12188 [9:38:52<15:30:24, 7.20s/it] 36%|███▋ | 4435/12188 [9:38:59<15:01:08, 6.97s/it] {'loss': 0.3459, 'grad_norm': 0.6727447036845013, 'learning_rate': 7.352075254077631e-06, 'epoch': 0.36} + 36%|███▋ | 4435/12188 [9:38:59<15:01:08, 6.97s/it] 36%|███▋ | 4436/12188 [9:39:07<15:44:24, 7.31s/it] {'loss': 0.3563, 'grad_norm': 0.6554882436607857, 'learning_rate': 7.350902660627619e-06, 'epoch': 0.36} + 36%|███▋ | 4436/12188 [9:39:07<15:44:24, 7.31s/it] 36%|███▋ | 4437/12188 [9:39:16<16:35:35, 7.71s/it] {'loss': 0.3668, 'grad_norm': 0.5994054344432587, 'learning_rate': 7.349729901160641e-06, 'epoch': 0.36} + 36%|███▋ | 4437/12188 [9:39:16<16:35:35, 7.71s/it] 36%|███▋ | 4438/12188 [9:39:22<16:02:17, 7.45s/it] {'loss': 0.3352, 'grad_norm': 0.6348533510354611, 'learning_rate': 7.348556975759512e-06, 'epoch': 0.36} + 36%|███▋ | 4438/12188 [9:39:22<16:02:17, 7.45s/it] 36%|███▋ | 4439/12188 [9:39:29<15:27:48, 7.18s/it] {'loss': 0.3252, 'grad_norm': 0.7251521769048809, 'learning_rate': 7.3473838845070624e-06, 'epoch': 0.36} + 36%|███▋ | 4439/12188 [9:39:29<15:27:48, 7.18s/it] 36%|███▋ | 4440/12188 [9:39:36<15:05:32, 7.01s/it] {'loss': 0.3476, 'grad_norm': 0.6441605967026591, 'learning_rate': 7.346210627486135e-06, 'epoch': 0.36} + 36%|███▋ | 4440/12188 [9:39:36<15:05:32, 7.01s/it] 36%|███▋ | 4441/12188 [9:39:43<15:09:03, 7.04s/it] {'loss': 0.3373, 'grad_norm': 0.6176229748268995, 'learning_rate': 7.345037204779582e-06, 'epoch': 0.36} + 36%|███▋ | 4441/12188 [9:39:43<15:09:03, 7.04s/it] 36%|███▋ | 4442/12188 [9:39:49<14:48:32, 6.88s/it] {'loss': 0.3176, 'grad_norm': 0.6269304182499377, 'learning_rate': 7.343863616470268e-06, 'epoch': 0.36} + 36%|███▋ | 4442/12188 [9:39:49<14:48:32, 6.88s/it] 36%|███▋ | 4443/12188 [9:39:57<15:28:31, 7.19s/it] {'loss': 0.3429, 'grad_norm': 0.6006855761953981, 'learning_rate': 7.3426898626410725e-06, 'epoch': 0.36} + 36%|███▋ | 4443/12188 [9:39:57<15:28:31, 7.19s/it] 36%|███▋ | 4444/12188 [9:40:05<15:54:26, 7.39s/it] {'loss': 0.339, 'grad_norm': 0.6547044309892195, 'learning_rate': 7.341515943374883e-06, 'epoch': 0.36} + 36%|███▋ | 4444/12188 [9:40:05<15:54:26, 7.39s/it] 36%|███▋ | 4445/12188 [9:40:12<15:41:35, 7.30s/it] {'loss': 0.3869, 'grad_norm': 0.6725113197848159, 'learning_rate': 7.340341858754599e-06, 'epoch': 0.36} + 36%|███▋ | 4445/12188 [9:40:12<15:41:35, 7.30s/it] 36%|███▋ | 4446/12188 [9:40:19<15:23:47, 7.16s/it] {'loss': 0.3664, 'grad_norm': 0.6147373243573265, 'learning_rate': 7.339167608863131e-06, 'epoch': 0.36} + 36%|███▋ | 4446/12188 [9:40:19<15:23:47, 7.16s/it] 36%|███▋ | 4447/12188 [9:40:26<15:04:08, 7.01s/it] {'loss': 0.343, 'grad_norm': 0.6375180721352509, 'learning_rate': 7.337993193783408e-06, 'epoch': 0.36} + 36%|███▋ | 4447/12188 [9:40:26<15:04:08, 7.01s/it] 36%|███▋ | 4448/12188 [9:40:33<15:34:38, 7.25s/it] {'loss': 0.3417, 'grad_norm': 0.6147827627242128, 'learning_rate': 7.336818613598361e-06, 'epoch': 0.36} + 36%|███▋ | 4448/12188 [9:40:33<15:34:38, 7.25s/it] 37%|███▋ | 4449/12188 [9:40:41<15:42:46, 7.31s/it] {'loss': 0.3715, 'grad_norm': 0.6020835506477966, 'learning_rate': 7.335643868390938e-06, 'epoch': 0.37} + 37%|███▋ | 4449/12188 [9:40:41<15:42:46, 7.31s/it] 37%|███▋ | 4450/12188 [9:40:48<15:25:01, 7.17s/it] {'loss': 0.3353, 'grad_norm': 0.6260595375567823, 'learning_rate': 7.3344689582440955e-06, 'epoch': 0.37} + 37%|███▋ | 4450/12188 [9:40:48<15:25:01, 7.17s/it] 37%|███▋ | 4451/12188 [9:40:56<16:17:19, 7.58s/it] {'loss': 0.3632, 'grad_norm': 0.6366009324746107, 'learning_rate': 7.333293883240808e-06, 'epoch': 0.37} + 37%|███▋ | 4451/12188 [9:40:56<16:17:19, 7.58s/it] 37%|███▋ | 4452/12188 [9:41:03<15:55:14, 7.41s/it] {'loss': 0.368, 'grad_norm': 0.600599193340005, 'learning_rate': 7.332118643464053e-06, 'epoch': 0.37} + 37%|███▋ | 4452/12188 [9:41:03<15:55:14, 7.41s/it] 37%|███▋ | 4453/12188 [9:41:10<15:41:43, 7.30s/it] {'loss': 0.3594, 'grad_norm': 0.6096312564988314, 'learning_rate': 7.330943238996828e-06, 'epoch': 0.37} + 37%|███▋ | 4453/12188 [9:41:10<15:41:43, 7.30s/it] 37%|███▋ | 4454/12188 [9:41:17<15:21:21, 7.15s/it] {'loss': 0.3798, 'grad_norm': 0.5725190989470458, 'learning_rate': 7.329767669922137e-06, 'epoch': 0.37} + 37%|███▋ | 4454/12188 [9:41:17<15:21:21, 7.15s/it] 37%|███▋ | 4455/12188 [9:41:24<15:12:19, 7.08s/it] {'loss': 0.373, 'grad_norm': 0.6150599889606586, 'learning_rate': 7.328591936322995e-06, 'epoch': 0.37} + 37%|███▋ | 4455/12188 [9:41:24<15:12:19, 7.08s/it] 37%|███▋ | 4456/12188 [9:41:31<15:04:51, 7.02s/it] {'loss': 0.3719, 'grad_norm': 0.6424906307449497, 'learning_rate': 7.327416038282431e-06, 'epoch': 0.37} + 37%|███▋ | 4456/12188 [9:41:31<15:04:51, 7.02s/it] 37%|███▋ | 4457/12188 [9:41:38<15:04:48, 7.02s/it] {'loss': 0.3472, 'grad_norm': 0.6309647227282019, 'learning_rate': 7.326239975883487e-06, 'epoch': 0.37} + 37%|███▋ | 4457/12188 [9:41:38<15:04:48, 7.02s/it] 37%|███▋ | 4458/12188 [9:41:45<14:48:17, 6.89s/it] {'loss': 0.3594, 'grad_norm': 0.8904847635425932, 'learning_rate': 7.325063749209212e-06, 'epoch': 0.37} + 37%|███▋ | 4458/12188 [9:41:45<14:48:17, 6.89s/it] 37%|███▋ | 4459/12188 [9:41:51<14:29:21, 6.75s/it] {'loss': 0.3492, 'grad_norm': 0.6238159381357332, 'learning_rate': 7.323887358342672e-06, 'epoch': 0.37} + 37%|███▋ | 4459/12188 [9:41:51<14:29:21, 6.75s/it] 37%|███▋ | 4460/12188 [9:41:58<14:56:13, 6.96s/it] {'loss': 0.3564, 'grad_norm': 0.6487635344976724, 'learning_rate': 7.3227108033669394e-06, 'epoch': 0.37} + 37%|███▋ | 4460/12188 [9:41:58<14:56:13, 6.96s/it] 37%|███▋ | 4461/12188 [9:42:06<15:17:06, 7.12s/it] {'loss': 0.3225, 'grad_norm': 0.6284846517162695, 'learning_rate': 7.321534084365101e-06, 'epoch': 0.37} + 37%|███▋ | 4461/12188 [9:42:06<15:17:06, 7.12s/it] 37%|███▋ | 4462/12188 [9:42:13<15:22:25, 7.16s/it] {'loss': 0.3191, 'grad_norm': 0.7125498405467358, 'learning_rate': 7.320357201420257e-06, 'epoch': 0.37} + 37%|███▋ | 4462/12188 [9:42:13<15:22:25, 7.16s/it] 37%|███▋ | 4463/12188 [9:42:20<15:11:20, 7.08s/it] {'loss': 0.3757, 'grad_norm': 0.7099434655516514, 'learning_rate': 7.319180154615515e-06, 'epoch': 0.37} + 37%|███▋ | 4463/12188 [9:42:20<15:11:20, 7.08s/it] 37%|███▋ | 4464/12188 [9:42:27<14:53:24, 6.94s/it] {'loss': 0.3424, 'grad_norm': 0.5976533147164482, 'learning_rate': 7.318002944033995e-06, 'epoch': 0.37} + 37%|███▋ | 4464/12188 [9:42:27<14:53:24, 6.94s/it] 37%|███▋ | 4465/12188 [9:42:34<15:01:09, 7.00s/it] {'loss': 0.3418, 'grad_norm': 0.6213979599963461, 'learning_rate': 7.316825569758833e-06, 'epoch': 0.37} + 37%|███▋ | 4465/12188 [9:42:34<15:01:09, 7.00s/it] 37%|███▋ | 4466/12188 [9:42:40<14:44:19, 6.87s/it] {'loss': 0.3735, 'grad_norm': 0.6734566194971553, 'learning_rate': 7.31564803187317e-06, 'epoch': 0.37} + 37%|███▋ | 4466/12188 [9:42:40<14:44:19, 6.87s/it] 37%|███▋ | 4467/12188 [9:42:47<14:45:26, 6.88s/it] {'loss': 0.4059, 'grad_norm': 0.6388291634179296, 'learning_rate': 7.314470330460164e-06, 'epoch': 0.37} + 37%|███▋ | 4467/12188 [9:42:47<14:45:26, 6.88s/it] 37%|███▋ | 4468/12188 [9:42:55<15:02:10, 7.01s/it] {'loss': 0.4027, 'grad_norm': 0.6581903859379095, 'learning_rate': 7.313292465602983e-06, 'epoch': 0.37} + 37%|███▋ | 4468/12188 [9:42:55<15:02:10, 7.01s/it] 37%|███▋ | 4469/12188 [9:43:02<15:09:35, 7.07s/it] {'loss': 0.3409, 'grad_norm': 0.6446268187717511, 'learning_rate': 7.312114437384804e-06, 'epoch': 0.37} + 37%|███▋ | 4469/12188 [9:43:02<15:09:35, 7.07s/it] 37%|███▋ | 4470/12188 [9:43:10<15:37:20, 7.29s/it] {'loss': 0.3481, 'grad_norm': 0.6318880958570701, 'learning_rate': 7.310936245888818e-06, 'epoch': 0.37} + 37%|███▋ | 4470/12188 [9:43:10<15:37:20, 7.29s/it] 37%|███▋ | 4471/12188 [9:43:17<15:42:03, 7.32s/it] {'loss': 0.3836, 'grad_norm': 0.6527347820093603, 'learning_rate': 7.309757891198227e-06, 'epoch': 0.37} + 37%|███▋ | 4471/12188 [9:43:17<15:42:03, 7.32s/it] 37%|███▋ | 4472/12188 [9:43:24<15:23:39, 7.18s/it] {'loss': 0.3233, 'grad_norm': 0.6569228036941724, 'learning_rate': 7.308579373396244e-06, 'epoch': 0.37} + 37%|███▋ | 4472/12188 [9:43:24<15:23:39, 7.18s/it] 37%|███▋ | 4473/12188 [9:43:31<15:35:29, 7.28s/it] {'loss': 0.3439, 'grad_norm': 0.6539466341023623, 'learning_rate': 7.307400692566096e-06, 'epoch': 0.37} + 37%|███▋ | 4473/12188 [9:43:31<15:35:29, 7.28s/it] 37%|███▋ | 4474/12188 [9:43:38<15:13:55, 7.11s/it] {'loss': 0.3494, 'grad_norm': 0.6297798781630164, 'learning_rate': 7.306221848791016e-06, 'epoch': 0.37} + 37%|███▋ | 4474/12188 [9:43:38<15:13:55, 7.11s/it] 37%|███▋ | 4475/12188 [9:43:45<15:11:52, 7.09s/it] {'loss': 0.3523, 'grad_norm': 0.612803921478661, 'learning_rate': 7.305042842154256e-06, 'epoch': 0.37} + 37%|███▋ | 4475/12188 [9:43:45<15:11:52, 7.09s/it] 37%|███▋ | 4476/12188 [9:43:52<15:07:41, 7.06s/it] {'loss': 0.3521, 'grad_norm': 0.7093418937619914, 'learning_rate': 7.3038636727390735e-06, 'epoch': 0.37} + 37%|███▋ | 4476/12188 [9:43:52<15:07:41, 7.06s/it] 37%|███▋ | 4477/12188 [9:43:59<14:53:58, 6.96s/it] {'loss': 0.3627, 'grad_norm': 0.6346448344765775, 'learning_rate': 7.302684340628741e-06, 'epoch': 0.37} + 37%|███▋ | 4477/12188 [9:43:59<14:53:58, 6.96s/it] 37%|███▋ | 4478/12188 [9:44:06<15:04:57, 7.04s/it] {'loss': 0.3586, 'grad_norm': 0.6525679523046496, 'learning_rate': 7.301504845906539e-06, 'epoch': 0.37} + 37%|███▋ | 4478/12188 [9:44:06<15:04:57, 7.04s/it] 37%|███▋ | 4479/12188 [9:44:13<15:15:15, 7.12s/it] {'loss': 0.3343, 'grad_norm': 0.5715750574327396, 'learning_rate': 7.300325188655762e-06, 'epoch': 0.37} + 37%|███▋ | 4479/12188 [9:44:13<15:15:15, 7.12s/it] 37%|███▋ | 4480/12188 [9:44:20<14:49:09, 6.92s/it] {'loss': 0.318, 'grad_norm': 0.6305398437290901, 'learning_rate': 7.299145368959716e-06, 'epoch': 0.37} + 37%|███▋ | 4480/12188 [9:44:20<14:49:09, 6.92s/it] 37%|███▋ | 4481/12188 [9:44:27<14:46:07, 6.90s/it] {'loss': 0.374, 'grad_norm': 0.6584126996205908, 'learning_rate': 7.297965386901717e-06, 'epoch': 0.37} + 37%|███▋ | 4481/12188 [9:44:27<14:46:07, 6.90s/it] 37%|███▋ | 4482/12188 [9:44:34<14:56:31, 6.98s/it] {'loss': 0.3263, 'grad_norm': 0.6037557676825892, 'learning_rate': 7.296785242565096e-06, 'epoch': 0.37} + 37%|███▋ | 4482/12188 [9:44:34<14:56:31, 6.98s/it] 37%|███▋ | 4483/12188 [9:44:41<14:46:19, 6.90s/it] {'loss': 0.3379, 'grad_norm': 0.8780942747403893, 'learning_rate': 7.295604936033191e-06, 'epoch': 0.37} + 37%|███▋ | 4483/12188 [9:44:41<14:46:19, 6.90s/it] 37%|███▋ | 4484/12188 [9:44:48<15:00:53, 7.02s/it] {'loss': 0.3244, 'grad_norm': 0.6140191241266739, 'learning_rate': 7.294424467389354e-06, 'epoch': 0.37} + 37%|███▋ | 4484/12188 [9:44:48<15:00:53, 7.02s/it] 37%|███▋ | 4485/12188 [9:44:55<15:14:19, 7.12s/it] {'loss': 0.3721, 'grad_norm': 0.6342944379965634, 'learning_rate': 7.293243836716946e-06, 'epoch': 0.37} + 37%|███▋ | 4485/12188 [9:44:55<15:14:19, 7.12s/it] 37%|███▋ | 4486/12188 [9:45:02<15:07:08, 7.07s/it] {'loss': 0.3548, 'grad_norm': 0.632325675135138, 'learning_rate': 7.292063044099342e-06, 'epoch': 0.37} + 37%|███▋ | 4486/12188 [9:45:02<15:07:08, 7.07s/it] 37%|███▋ | 4487/12188 [9:45:09<15:03:58, 7.04s/it] {'loss': 0.4026, 'grad_norm': 0.6303741355678775, 'learning_rate': 7.29088208961993e-06, 'epoch': 0.37} + 37%|███▋ | 4487/12188 [9:45:09<15:03:58, 7.04s/it] 37%|███▋ | 4488/12188 [9:45:16<14:55:28, 6.98s/it] {'loss': 0.3916, 'grad_norm': 0.6377357492070529, 'learning_rate': 7.289700973362105e-06, 'epoch': 0.37} + 37%|███▋ | 4488/12188 [9:45:16<14:55:28, 6.98s/it] 37%|███▋ | 4489/12188 [9:45:23<14:55:23, 6.98s/it] {'loss': 0.3407, 'grad_norm': 0.7126510202866146, 'learning_rate': 7.288519695409276e-06, 'epoch': 0.37} + 37%|███▋ | 4489/12188 [9:45:23<14:55:23, 6.98s/it] 37%|███▋ | 4490/12188 [9:45:31<15:34:31, 7.28s/it] {'loss': 0.3575, 'grad_norm': 0.6605418186646366, 'learning_rate': 7.287338255844862e-06, 'epoch': 0.37} + 37%|███▋ | 4490/12188 [9:45:31<15:34:31, 7.28s/it] 37%|███▋ | 4491/12188 [9:45:38<15:23:40, 7.20s/it] {'loss': 0.3122, 'grad_norm': 0.6037327318495305, 'learning_rate': 7.286156654752297e-06, 'epoch': 0.37} + 37%|███▋ | 4491/12188 [9:45:38<15:23:40, 7.20s/it] 37%|███▋ | 4492/12188 [9:45:45<15:08:20, 7.08s/it] {'loss': 0.3427, 'grad_norm': 0.676753591528034, 'learning_rate': 7.284974892215021e-06, 'epoch': 0.37} + 37%|███▋ | 4492/12188 [9:45:45<15:08:20, 7.08s/it] 37%|███▋ | 4493/12188 [9:45:52<14:59:27, 7.01s/it] {'loss': 0.3321, 'grad_norm': 0.62955802748041, 'learning_rate': 7.28379296831649e-06, 'epoch': 0.37} + 37%|███▋ | 4493/12188 [9:45:52<14:59:27, 7.01s/it] 37%|███▋ | 4494/12188 [9:46:01<16:12:49, 7.59s/it] {'loss': 0.3526, 'grad_norm': 0.6434062289016449, 'learning_rate': 7.282610883140166e-06, 'epoch': 0.37} + 37%|███▋ | 4494/12188 [9:46:01<16:12:49, 7.59s/it] 37%|███▋ | 4495/12188 [9:46:08<15:52:18, 7.43s/it] {'loss': 0.356, 'grad_norm': 0.6123325529613416, 'learning_rate': 7.2814286367695315e-06, 'epoch': 0.37} + 37%|███▋ | 4495/12188 [9:46:08<15:52:18, 7.43s/it] 37%|███▋ | 4496/12188 [9:46:15<15:55:09, 7.45s/it] {'loss': 0.3197, 'grad_norm': 0.645541094768656, 'learning_rate': 7.280246229288073e-06, 'epoch': 0.37} + 37%|███▋ | 4496/12188 [9:46:15<15:55:09, 7.45s/it] 37%|███▋ | 4497/12188 [9:46:23<16:01:43, 7.50s/it] {'loss': 0.3468, 'grad_norm': 0.6481948085789633, 'learning_rate': 7.2790636607792875e-06, 'epoch': 0.37} + 37%|███▋ | 4497/12188 [9:46:23<16:01:43, 7.50s/it] 37%|███▋ | 4498/12188 [9:46:30<15:42:32, 7.35s/it] {'loss': 0.4115, 'grad_norm': 0.6816806057956345, 'learning_rate': 7.2778809313266885e-06, 'epoch': 0.37} + 37%|███▋ | 4498/12188 [9:46:30<15:42:32, 7.35s/it] 37%|███▋ | 4499/12188 [9:46:38<16:32:27, 7.74s/it] {'loss': 0.3893, 'grad_norm': 0.7121846019766345, 'learning_rate': 7.276698041013797e-06, 'epoch': 0.37} + 37%|███▋ | 4499/12188 [9:46:38<16:32:27, 7.74s/it] 37%|███▋ | 4500/12188 [9:46:45<16:07:23, 7.55s/it] {'loss': 0.3491, 'grad_norm': 0.6462996314843406, 'learning_rate': 7.275514989924147e-06, 'epoch': 0.37} + 37%|███▋ | 4500/12188 [9:46:45<16:07:23, 7.55s/it] 37%|███▋ | 4501/12188 [9:46:52<15:45:59, 7.38s/it] {'loss': 0.3506, 'grad_norm': 0.6637095542908347, 'learning_rate': 7.274331778141286e-06, 'epoch': 0.37} + 37%|███▋ | 4501/12188 [9:46:52<15:45:59, 7.38s/it] 37%|███▋ | 4502/12188 [9:47:00<15:41:32, 7.35s/it] {'loss': 0.3178, 'grad_norm': 0.6533285104217263, 'learning_rate': 7.273148405748768e-06, 'epoch': 0.37} + 37%|███▋ | 4502/12188 [9:47:00<15:41:32, 7.35s/it] 37%|███▋ | 4503/12188 [9:47:09<16:36:36, 7.78s/it] {'loss': 0.3931, 'grad_norm': 0.6283698751010459, 'learning_rate': 7.271964872830161e-06, 'epoch': 0.37} + 37%|███▋ | 4503/12188 [9:47:09<16:36:36, 7.78s/it] 37%|███▋ | 4504/12188 [9:47:15<16:05:24, 7.54s/it] {'loss': 0.3405, 'grad_norm': 0.6415766094456171, 'learning_rate': 7.2707811794690465e-06, 'epoch': 0.37} + 37%|███▋ | 4504/12188 [9:47:16<16:05:24, 7.54s/it] 37%|███▋ | 4505/12188 [9:47:26<17:49:30, 8.35s/it] {'loss': 0.3474, 'grad_norm': 0.6485447339297795, 'learning_rate': 7.269597325749013e-06, 'epoch': 0.37} + 37%|███▋ | 4505/12188 [9:47:26<17:49:30, 8.35s/it] 37%|███▋ | 4506/12188 [9:47:33<17:15:35, 8.09s/it] {'loss': 0.3251, 'grad_norm': 0.6423858484568417, 'learning_rate': 7.268413311753661e-06, 'epoch': 0.37} + 37%|███▋ | 4506/12188 [9:47:33<17:15:35, 8.09s/it] 37%|███▋ | 4507/12188 [9:47:41<16:44:27, 7.85s/it] {'loss': 0.3576, 'grad_norm': 0.6332851366570621, 'learning_rate': 7.267229137566607e-06, 'epoch': 0.37} + 37%|███▋ | 4507/12188 [9:47:41<16:44:27, 7.85s/it] 37%|███▋ | 4508/12188 [9:47:49<17:16:18, 8.10s/it] {'loss': 0.3521, 'grad_norm': 0.6080020232116587, 'learning_rate': 7.266044803271473e-06, 'epoch': 0.37} + 37%|███▋ | 4508/12188 [9:47:49<17:16:18, 8.10s/it] 37%|███▋ | 4509/12188 [9:47:56<16:43:01, 7.84s/it] {'loss': 0.3442, 'grad_norm': 0.65694068293177, 'learning_rate': 7.264860308951897e-06, 'epoch': 0.37} + 37%|███▋ | 4509/12188 [9:47:56<16:43:01, 7.84s/it] 37%|███▋ | 4510/12188 [9:48:03<15:57:26, 7.48s/it] {'loss': 0.3203, 'grad_norm': 0.646239747107836, 'learning_rate': 7.263675654691524e-06, 'epoch': 0.37} + 37%|███▋ | 4510/12188 [9:48:03<15:57:26, 7.48s/it] 37%|███▋ | 4511/12188 [9:48:10<15:34:18, 7.30s/it] {'loss': 0.3475, 'grad_norm': 0.6700003462171, 'learning_rate': 7.262490840574013e-06, 'epoch': 0.37} + 37%|███▋ | 4511/12188 [9:48:10<15:34:18, 7.30s/it] 37%|███▋ | 4512/12188 [9:48:18<15:48:22, 7.41s/it] {'loss': 0.3263, 'grad_norm': 0.6159476581278434, 'learning_rate': 7.261305866683034e-06, 'epoch': 0.37} + 37%|███▋ | 4512/12188 [9:48:18<15:48:22, 7.41s/it] 37%|███▋ | 4513/12188 [9:48:24<15:23:50, 7.22s/it] {'loss': 0.3784, 'grad_norm': 0.6213180382365286, 'learning_rate': 7.260120733102267e-06, 'epoch': 0.37} + 37%|███▋ | 4513/12188 [9:48:24<15:23:50, 7.22s/it] 37%|███▋ | 4514/12188 [9:48:33<15:58:52, 7.50s/it] {'loss': 0.3949, 'grad_norm': 0.6830069200113369, 'learning_rate': 7.258935439915407e-06, 'epoch': 0.37} + 37%|███▋ | 4514/12188 [9:48:33<15:58:52, 7.50s/it] 37%|███▋ | 4515/12188 [9:48:40<15:51:57, 7.44s/it] {'loss': 0.3532, 'grad_norm': 0.6471402647261272, 'learning_rate': 7.257749987206154e-06, 'epoch': 0.37} + 37%|███▋ | 4515/12188 [9:48:40<15:51:57, 7.44s/it] 37%|███▋ | 4516/12188 [9:48:47<15:24:07, 7.23s/it] {'loss': 0.3475, 'grad_norm': 0.6441136377526595, 'learning_rate': 7.2565643750582235e-06, 'epoch': 0.37} + 37%|███▋ | 4516/12188 [9:48:47<15:24:07, 7.23s/it] 37%|███▋ | 4517/12188 [9:48:53<14:58:08, 7.02s/it] {'loss': 0.3481, 'grad_norm': 0.623062700862877, 'learning_rate': 7.255378603555344e-06, 'epoch': 0.37} + 37%|███▋ | 4517/12188 [9:48:53<14:58:08, 7.02s/it] 37%|███▋ | 4518/12188 [9:49:00<14:46:02, 6.93s/it] {'loss': 0.3687, 'grad_norm': 0.6467878762655555, 'learning_rate': 7.25419267278125e-06, 'epoch': 0.37} + 37%|███▋ | 4518/12188 [9:49:00<14:46:02, 6.93s/it] 37%|███▋ | 4519/12188 [9:49:07<14:47:02, 6.94s/it] {'loss': 0.3392, 'grad_norm': 0.612524860095522, 'learning_rate': 7.253006582819692e-06, 'epoch': 0.37} + 37%|███▋ | 4519/12188 [9:49:07<14:47:02, 6.94s/it] 37%|███▋ | 4520/12188 [9:49:15<15:54:16, 7.47s/it] {'loss': 0.3838, 'grad_norm': 0.6119624838557122, 'learning_rate': 7.251820333754428e-06, 'epoch': 0.37} + 37%|███▋ | 4520/12188 [9:49:16<15:54:16, 7.47s/it] 37%|███▋ | 4521/12188 [9:49:22<15:29:24, 7.27s/it] {'loss': 0.3159, 'grad_norm': 0.6299302769517757, 'learning_rate': 7.2506339256692295e-06, 'epoch': 0.37} + 37%|███▋ | 4521/12188 [9:49:22<15:29:24, 7.27s/it] 37%|███▋ | 4522/12188 [9:49:29<15:09:02, 7.11s/it] {'loss': 0.3354, 'grad_norm': 0.6485023649539763, 'learning_rate': 7.24944735864788e-06, 'epoch': 0.37} + 37%|███▋ | 4522/12188 [9:49:29<15:09:02, 7.11s/it] 37%|███▋ | 4523/12188 [9:49:37<15:47:25, 7.42s/it] {'loss': 0.3481, 'grad_norm': 0.6171984771550416, 'learning_rate': 7.248260632774171e-06, 'epoch': 0.37} + 37%|███▋ | 4523/12188 [9:49:37<15:47:25, 7.42s/it] 37%|███▋ | 4524/12188 [9:49:44<15:17:16, 7.18s/it] {'loss': 0.32, 'grad_norm': 0.6228146720681734, 'learning_rate': 7.247073748131911e-06, 'epoch': 0.37} + 37%|███▋ | 4524/12188 [9:49:44<15:17:16, 7.18s/it] 37%|███▋ | 4525/12188 [9:49:51<15:07:41, 7.11s/it] {'loss': 0.353, 'grad_norm': 0.6221656835005879, 'learning_rate': 7.245886704804911e-06, 'epoch': 0.37} + 37%|███▋ | 4525/12188 [9:49:51<15:07:41, 7.11s/it] 37%|███▋ | 4526/12188 [9:49:58<15:15:27, 7.17s/it] {'loss': 0.3556, 'grad_norm': 0.6792378864310321, 'learning_rate': 7.244699502877e-06, 'epoch': 0.37} + 37%|███▋ | 4526/12188 [9:49:58<15:15:27, 7.17s/it] 37%|███▋ | 4527/12188 [9:50:06<15:28:17, 7.27s/it] {'loss': 0.3294, 'grad_norm': 0.5551696875630208, 'learning_rate': 7.243512142432017e-06, 'epoch': 0.37} + 37%|███▋ | 4527/12188 [9:50:06<15:28:17, 7.27s/it] 37%|███▋ | 4528/12188 [9:50:13<15:51:30, 7.45s/it] {'loss': 0.4055, 'grad_norm': 0.6636198359313248, 'learning_rate': 7.242324623553809e-06, 'epoch': 0.37} + 37%|███▋ | 4528/12188 [9:50:13<15:51:30, 7.45s/it] 37%|███▋ | 4529/12188 [9:50:21<16:06:20, 7.57s/it] {'loss': 0.3515, 'grad_norm': 0.6103341228489678, 'learning_rate': 7.2411369463262415e-06, 'epoch': 0.37} + 37%|███▋ | 4529/12188 [9:50:21<16:06:20, 7.57s/it] 37%|███▋ | 4530/12188 [9:50:28<15:51:33, 7.46s/it] {'loss': 0.3808, 'grad_norm': 0.6229334169396119, 'learning_rate': 7.239949110833182e-06, 'epoch': 0.37} + 37%|███▋ | 4530/12188 [9:50:28<15:51:33, 7.46s/it] 37%|███▋ | 4531/12188 [9:50:35<15:32:09, 7.30s/it] {'loss': 0.3578, 'grad_norm': 0.6076721264717494, 'learning_rate': 7.238761117158516e-06, 'epoch': 0.37} + 37%|███▋ | 4531/12188 [9:50:35<15:32:09, 7.30s/it] 37%|███▋ | 4532/12188 [9:50:42<15:05:33, 7.10s/it] {'loss': 0.3459, 'grad_norm': 0.5840020162022859, 'learning_rate': 7.237572965386136e-06, 'epoch': 0.37} + 37%|███▋ | 4532/12188 [9:50:42<15:05:33, 7.10s/it] 37%|███▋ | 4533/12188 [9:50:49<15:08:55, 7.12s/it] {'loss': 0.3802, 'grad_norm': 0.6344823705302016, 'learning_rate': 7.236384655599948e-06, 'epoch': 0.37} + 37%|███▋ | 4533/12188 [9:50:49<15:08:55, 7.12s/it] 37%|███▋ | 4534/12188 [9:50:56<14:52:22, 7.00s/it] {'loss': 0.3626, 'grad_norm': 0.6337986228321436, 'learning_rate': 7.235196187883869e-06, 'epoch': 0.37} + 37%|███▋ | 4534/12188 [9:50:56<14:52:22, 7.00s/it] 37%|███▋ | 4535/12188 [9:51:03<15:08:09, 7.12s/it] {'loss': 0.3452, 'grad_norm': 0.6160054787549931, 'learning_rate': 7.234007562321826e-06, 'epoch': 0.37} + 37%|███▋ | 4535/12188 [9:51:03<15:08:09, 7.12s/it] 37%|███▋ | 4536/12188 [9:51:10<14:59:55, 7.06s/it] {'loss': 0.3343, 'grad_norm': 0.6027258657964668, 'learning_rate': 7.232818778997759e-06, 'epoch': 0.37} + 37%|███▋ | 4536/12188 [9:51:10<14:59:55, 7.06s/it] 37%|███▋ | 4537/12188 [9:51:18<15:13:15, 7.16s/it] {'loss': 0.3879, 'grad_norm': 0.6025708834732685, 'learning_rate': 7.231629837995616e-06, 'epoch': 0.37} + 37%|███▋ | 4537/12188 [9:51:18<15:13:15, 7.16s/it] 37%|███▋ | 4538/12188 [9:51:25<15:06:51, 7.11s/it] {'loss': 0.3697, 'grad_norm': 0.6347990496239765, 'learning_rate': 7.230440739399361e-06, 'epoch': 0.37} + 37%|███▋ | 4538/12188 [9:51:25<15:06:51, 7.11s/it] 37%|███▋ | 4539/12188 [9:51:32<15:09:16, 7.13s/it] {'loss': 0.3507, 'grad_norm': 0.6455741405465947, 'learning_rate': 7.229251483292963e-06, 'epoch': 0.37} + 37%|███▋ | 4539/12188 [9:51:32<15:09:16, 7.13s/it] 37%|███▋ | 4540/12188 [9:51:39<14:57:07, 7.04s/it] {'loss': 0.3182, 'grad_norm': 0.6236592684346532, 'learning_rate': 7.228062069760407e-06, 'epoch': 0.37} + 37%|███▋ | 4540/12188 [9:51:39<14:57:07, 7.04s/it] 37%|███▋ | 4541/12188 [9:51:46<14:54:37, 7.02s/it] {'loss': 0.3389, 'grad_norm': 0.7002629926765556, 'learning_rate': 7.226872498885687e-06, 'epoch': 0.37} + 37%|███▋ | 4541/12188 [9:51:46<14:54:37, 7.02s/it] 37%|███▋ | 4542/12188 [9:51:53<15:05:59, 7.11s/it] {'loss': 0.3435, 'grad_norm': 0.6246921403067516, 'learning_rate': 7.225682770752809e-06, 'epoch': 0.37} + 37%|███▋ | 4542/12188 [9:51:53<15:05:59, 7.11s/it] 37%|███▋ | 4543/12188 [9:52:00<15:00:24, 7.07s/it] {'loss': 0.3249, 'grad_norm': 0.629490115673483, 'learning_rate': 7.22449288544579e-06, 'epoch': 0.37} + 37%|███▋ | 4543/12188 [9:52:00<15:00:24, 7.07s/it] 37%|███▋ | 4544/12188 [9:52:07<15:13:00, 7.17s/it] {'loss': 0.3396, 'grad_norm': 0.6187782526746847, 'learning_rate': 7.223302843048657e-06, 'epoch': 0.37} + 37%|███▋ | 4544/12188 [9:52:07<15:13:00, 7.17s/it] 37%|███▋ | 4545/12188 [9:52:15<15:16:09, 7.19s/it] {'loss': 0.3389, 'grad_norm': 0.5641704033534676, 'learning_rate': 7.222112643645449e-06, 'epoch': 0.37} + 37%|███▋ | 4545/12188 [9:52:15<15:16:09, 7.19s/it] 37%|███▋ | 4546/12188 [9:52:21<14:54:27, 7.02s/it] {'loss': 0.357, 'grad_norm': 0.6602707855972512, 'learning_rate': 7.220922287320218e-06, 'epoch': 0.37} + 37%|███▋ | 4546/12188 [9:52:21<14:54:27, 7.02s/it] 37%|███▋ | 4547/12188 [9:52:28<14:46:06, 6.96s/it] {'loss': 0.3829, 'grad_norm': 0.6680970688849711, 'learning_rate': 7.219731774157021e-06, 'epoch': 0.37} + 37%|███▋ | 4547/12188 [9:52:28<14:46:06, 6.96s/it] 37%|███▋ | 4548/12188 [9:52:35<14:41:54, 6.93s/it] {'loss': 0.293, 'grad_norm': 0.6242002413107802, 'learning_rate': 7.218541104239932e-06, 'epoch': 0.37} + 37%|███▋ | 4548/12188 [9:52:35<14:41:54, 6.93s/it] 37%|███▋ | 4549/12188 [9:52:42<15:03:23, 7.10s/it] {'loss': 0.3369, 'grad_norm': 0.6588583150388905, 'learning_rate': 7.217350277653037e-06, 'epoch': 0.37} + 37%|███▋ | 4549/12188 [9:52:42<15:03:23, 7.10s/it] 37%|███▋ | 4550/12188 [9:52:49<14:45:50, 6.96s/it] {'loss': 0.3709, 'grad_norm': 0.6812972169835476, 'learning_rate': 7.216159294480426e-06, 'epoch': 0.37} + 37%|███▋ | 4550/12188 [9:52:49<14:45:50, 6.96s/it] 37%|███▋ | 4551/12188 [9:52:56<15:02:21, 7.09s/it] {'loss': 0.3355, 'grad_norm': 0.6230288394072228, 'learning_rate': 7.214968154806207e-06, 'epoch': 0.37} + 37%|███▋ | 4551/12188 [9:52:56<15:02:21, 7.09s/it] 37%|███▋ | 4552/12188 [9:53:03<14:47:33, 6.97s/it] {'loss': 0.3133, 'grad_norm': 0.6557079677485985, 'learning_rate': 7.213776858714496e-06, 'epoch': 0.37} + 37%|███▋ | 4552/12188 [9:53:03<14:47:33, 6.97s/it] 37%|███▋ | 4553/12188 [9:53:11<15:34:33, 7.34s/it] {'loss': 0.3159, 'grad_norm': 0.6520820953296452, 'learning_rate': 7.2125854062894184e-06, 'epoch': 0.37} + 37%|███▋ | 4553/12188 [9:53:11<15:34:33, 7.34s/it] 37%|███▋ | 4554/12188 [9:53:19<15:35:03, 7.35s/it] {'loss': 0.3675, 'grad_norm': 0.654679771493733, 'learning_rate': 7.211393797615115e-06, 'epoch': 0.37} + 37%|███▋ | 4554/12188 [9:53:19<15:35:03, 7.35s/it] 37%|███▋ | 4555/12188 [9:53:26<15:30:42, 7.32s/it] {'loss': 0.3959, 'grad_norm': 0.6717372182310656, 'learning_rate': 7.2102020327757346e-06, 'epoch': 0.37} + 37%|███▋ | 4555/12188 [9:53:26<15:30:42, 7.32s/it] 37%|███▋ | 4556/12188 [9:53:34<15:45:32, 7.43s/it] {'loss': 0.372, 'grad_norm': 0.6248080509617369, 'learning_rate': 7.209010111855438e-06, 'epoch': 0.37} + 37%|███▋ | 4556/12188 [9:53:34<15:45:32, 7.43s/it] 37%|███▋ | 4557/12188 [9:53:42<16:13:43, 7.66s/it] {'loss': 0.339, 'grad_norm': 0.670813213592887, 'learning_rate': 7.2078180349383954e-06, 'epoch': 0.37} + 37%|███▋ | 4557/12188 [9:53:42<16:13:43, 7.66s/it] 37%|███▋ | 4558/12188 [9:53:49<15:49:12, 7.46s/it] {'loss': 0.3603, 'grad_norm': 0.6319216197094404, 'learning_rate': 7.206625802108791e-06, 'epoch': 0.37} + 37%|███▋ | 4558/12188 [9:53:49<15:49:12, 7.46s/it] 37%|███▋ | 4559/12188 [9:53:56<15:23:06, 7.26s/it] {'loss': 0.3521, 'grad_norm': 0.6414289807226805, 'learning_rate': 7.205433413450818e-06, 'epoch': 0.37} + 37%|███▋ | 4559/12188 [9:53:56<15:23:06, 7.26s/it] 37%|███▋ | 4560/12188 [9:54:03<15:43:26, 7.42s/it] {'loss': 0.3246, 'grad_norm': 0.6101991972449019, 'learning_rate': 7.20424086904868e-06, 'epoch': 0.37} + 37%|███▋ | 4560/12188 [9:54:03<15:43:26, 7.42s/it] 37%|███▋ | 4561/12188 [9:54:12<16:15:03, 7.67s/it] {'loss': 0.3409, 'grad_norm': 0.6387235177142778, 'learning_rate': 7.203048168986594e-06, 'epoch': 0.37} + 37%|███▋ | 4561/12188 [9:54:12<16:15:03, 7.67s/it] 37%|███▋ | 4562/12188 [9:54:20<16:53:40, 7.98s/it] {'loss': 0.3499, 'grad_norm': 0.5927854054360885, 'learning_rate': 7.201855313348783e-06, 'epoch': 0.37} + 37%|███▋ | 4562/12188 [9:54:20<16:53:40, 7.98s/it] 37%|███▋ | 4563/12188 [9:54:27<16:21:52, 7.73s/it] {'loss': 0.3365, 'grad_norm': 0.6323178301152805, 'learning_rate': 7.20066230221949e-06, 'epoch': 0.37} + 37%|███▋ | 4563/12188 [9:54:27<16:21:52, 7.73s/it] 37%|███▋ | 4564/12188 [9:54:35<16:12:33, 7.65s/it] {'loss': 0.3565, 'grad_norm': 0.6396998104837491, 'learning_rate': 7.199469135682961e-06, 'epoch': 0.37} + 37%|███▋ | 4564/12188 [9:54:35<16:12:33, 7.65s/it] 37%|███▋ | 4565/12188 [9:54:42<15:38:19, 7.39s/it] {'loss': 0.3449, 'grad_norm': 0.6136634813664233, 'learning_rate': 7.1982758138234545e-06, 'epoch': 0.37} + 37%|███▋ | 4565/12188 [9:54:42<15:38:19, 7.39s/it] 37%|███▋ | 4566/12188 [9:54:49<15:34:00, 7.35s/it] {'loss': 0.3506, 'grad_norm': 0.6681420354874046, 'learning_rate': 7.197082336725242e-06, 'epoch': 0.37} + 37%|███▋ | 4566/12188 [9:54:49<15:34:00, 7.35s/it] 37%|███▋ | 4567/12188 [9:54:56<15:15:38, 7.21s/it] {'loss': 0.349, 'grad_norm': 0.6622153404679221, 'learning_rate': 7.195888704472604e-06, 'epoch': 0.37} + 37%|███▋ | 4567/12188 [9:54:56<15:15:38, 7.21s/it] 37%|███▋ | 4568/12188 [9:55:03<15:19:59, 7.24s/it] {'loss': 0.3279, 'grad_norm': 0.6131075384216026, 'learning_rate': 7.194694917149835e-06, 'epoch': 0.37} + 37%|███▋ | 4568/12188 [9:55:03<15:19:59, 7.24s/it] 37%|███▋ | 4569/12188 [9:55:10<15:09:41, 7.16s/it] {'loss': 0.3396, 'grad_norm': 0.5840939478889764, 'learning_rate': 7.193500974841237e-06, 'epoch': 0.37} + 37%|███▋ | 4569/12188 [9:55:10<15:09:41, 7.16s/it] 37%|███▋ | 4570/12188 [9:55:17<15:15:45, 7.21s/it] {'loss': 0.3113, 'grad_norm': 0.5936414153069233, 'learning_rate': 7.192306877631123e-06, 'epoch': 0.37} + 37%|███▋ | 4570/12188 [9:55:17<15:15:45, 7.21s/it] 38%|███▊ | 4571/12188 [9:55:26<15:48:32, 7.47s/it] {'loss': 0.3389, 'grad_norm': 0.6243617257217913, 'learning_rate': 7.19111262560382e-06, 'epoch': 0.38} + 38%|███▊ | 4571/12188 [9:55:26<15:48:32, 7.47s/it] 38%|███▊ | 4572/12188 [9:55:32<15:24:01, 7.28s/it] {'loss': 0.3553, 'grad_norm': 0.7087472663658257, 'learning_rate': 7.1899182188436645e-06, 'epoch': 0.38} + 38%|███▊ | 4572/12188 [9:55:32<15:24:01, 7.28s/it] 38%|███▊ | 4573/12188 [9:55:40<15:41:47, 7.42s/it] {'loss': 0.3163, 'grad_norm': 0.6266643024705528, 'learning_rate': 7.1887236574350014e-06, 'epoch': 0.38} + 38%|███▊ | 4573/12188 [9:55:40<15:41:47, 7.42s/it] 38%|███▊ | 4574/12188 [9:55:47<15:35:56, 7.38s/it] {'loss': 0.3632, 'grad_norm': 0.6284290215169902, 'learning_rate': 7.187528941462192e-06, 'epoch': 0.38} + 38%|███▊ | 4574/12188 [9:55:47<15:35:56, 7.38s/it] 38%|███▊ | 4575/12188 [9:55:55<16:00:44, 7.57s/it] {'loss': 0.3751, 'grad_norm': 0.5999578501799753, 'learning_rate': 7.186334071009601e-06, 'epoch': 0.38} + 38%|███▊ | 4575/12188 [9:55:55<16:00:44, 7.57s/it] 38%|███▊ | 4576/12188 [9:56:02<15:31:53, 7.35s/it] {'loss': 0.3704, 'grad_norm': 0.6131897245003688, 'learning_rate': 7.185139046161611e-06, 'epoch': 0.38} + 38%|███▊ | 4576/12188 [9:56:02<15:31:53, 7.35s/it] 38%|███▊ | 4577/12188 [9:56:10<15:28:20, 7.32s/it] {'loss': 0.3467, 'grad_norm': 0.636853065338729, 'learning_rate': 7.183943867002613e-06, 'epoch': 0.38} + 38%|███▊ | 4577/12188 [9:56:10<15:28:20, 7.32s/it] 38%|███▊ | 4578/12188 [9:56:17<15:30:26, 7.34s/it] {'loss': 0.3285, 'grad_norm': 0.669880208949198, 'learning_rate': 7.182748533617008e-06, 'epoch': 0.38} + 38%|███▊ | 4578/12188 [9:56:17<15:30:26, 7.34s/it] 38%|███▊ | 4579/12188 [9:56:24<15:23:05, 7.28s/it] {'loss': 0.3704, 'grad_norm': 0.6513621866812953, 'learning_rate': 7.181553046089208e-06, 'epoch': 0.38} + 38%|███▊ | 4579/12188 [9:56:24<15:23:05, 7.28s/it] 38%|███▊ | 4580/12188 [9:56:31<15:23:02, 7.28s/it] {'loss': 0.3433, 'grad_norm': 0.618763331950323, 'learning_rate': 7.180357404503637e-06, 'epoch': 0.38} + 38%|███▊ | 4580/12188 [9:56:31<15:23:02, 7.28s/it] 38%|███▊ | 4581/12188 [9:56:38<15:14:54, 7.22s/it] {'loss': 0.3711, 'grad_norm': 0.6738839395112313, 'learning_rate': 7.179161608944729e-06, 'epoch': 0.38} + 38%|███▊ | 4581/12188 [9:56:38<15:14:54, 7.22s/it] 38%|███▊ | 4582/12188 [9:56:45<15:04:11, 7.13s/it] {'loss': 0.3398, 'grad_norm': 0.6056175326675547, 'learning_rate': 7.1779656594969294e-06, 'epoch': 0.38} + 38%|███▊ | 4582/12188 [9:56:45<15:04:11, 7.13s/it] 38%|███▊ | 4583/12188 [9:56:52<14:54:18, 7.06s/it] {'loss': 0.3642, 'grad_norm': 0.6698336359043113, 'learning_rate': 7.176769556244694e-06, 'epoch': 0.38} + 38%|███▊ | 4583/12188 [9:56:52<14:54:18, 7.06s/it] 38%|███▊ | 4584/12188 [9:56:59<14:54:52, 7.06s/it] {'loss': 0.3521, 'grad_norm': 0.6457454653779154, 'learning_rate': 7.175573299272489e-06, 'epoch': 0.38} + 38%|███▊ | 4584/12188 [9:56:59<14:54:52, 7.06s/it] 38%|███▊ | 4585/12188 [9:57:08<15:54:50, 7.54s/it] {'loss': 0.3695, 'grad_norm': 0.5851184146503637, 'learning_rate': 7.174376888664793e-06, 'epoch': 0.38} + 38%|███▊ | 4585/12188 [9:57:08<15:54:50, 7.54s/it] 38%|███▊ | 4586/12188 [9:57:15<15:42:51, 7.44s/it] {'loss': 0.3899, 'grad_norm': 0.650207789781782, 'learning_rate': 7.1731803245060975e-06, 'epoch': 0.38} + 38%|███▊ | 4586/12188 [9:57:15<15:42:51, 7.44s/it] 38%|███▊ | 4587/12188 [9:57:23<15:42:48, 7.44s/it] {'loss': 0.3701, 'grad_norm': 0.6546627464387019, 'learning_rate': 7.171983606880897e-06, 'epoch': 0.38} + 38%|███▊ | 4587/12188 [9:57:23<15:42:48, 7.44s/it] 38%|███▊ | 4588/12188 [9:57:29<15:20:21, 7.27s/it] {'loss': 0.3422, 'grad_norm': 0.6245754979339774, 'learning_rate': 7.1707867358737025e-06, 'epoch': 0.38} + 38%|███▊ | 4588/12188 [9:57:29<15:20:21, 7.27s/it] 38%|███▊ | 4589/12188 [9:57:36<14:50:04, 7.03s/it] {'loss': 0.3834, 'grad_norm': 0.6183531432787688, 'learning_rate': 7.169589711569036e-06, 'epoch': 0.38} + 38%|███▊ | 4589/12188 [9:57:36<14:50:04, 7.03s/it] 38%|███▊ | 4590/12188 [9:57:43<14:53:41, 7.06s/it] {'loss': 0.3893, 'grad_norm': 0.6762340236268533, 'learning_rate': 7.168392534051432e-06, 'epoch': 0.38} + 38%|███▊ | 4590/12188 [9:57:43<14:53:41, 7.06s/it] 38%|███▊ | 4591/12188 [9:57:50<14:44:57, 6.99s/it] {'loss': 0.344, 'grad_norm': 0.6109926256002997, 'learning_rate': 7.16719520340543e-06, 'epoch': 0.38} + 38%|███▊ | 4591/12188 [9:57:50<14:44:57, 6.99s/it] 38%|███▊ | 4592/12188 [9:57:57<14:42:51, 6.97s/it] {'loss': 0.3359, 'grad_norm': 0.6408347195822748, 'learning_rate': 7.165997719715585e-06, 'epoch': 0.38} + 38%|███▊ | 4592/12188 [9:57:57<14:42:51, 6.97s/it] 38%|███▊ | 4593/12188 [9:58:04<14:40:47, 6.96s/it] {'loss': 0.2755, 'grad_norm': 0.6196138029302344, 'learning_rate': 7.164800083066461e-06, 'epoch': 0.38} + 38%|███▊ | 4593/12188 [9:58:04<14:40:47, 6.96s/it] 38%|███▊ | 4594/12188 [9:58:11<14:43:12, 6.98s/it] {'loss': 0.356, 'grad_norm': 0.6547185257254503, 'learning_rate': 7.163602293542632e-06, 'epoch': 0.38} + 38%|███▊ | 4594/12188 [9:58:11<14:43:12, 6.98s/it] 38%|███▊ | 4595/12188 [9:58:19<15:32:59, 7.37s/it] {'loss': 0.3398, 'grad_norm': 0.6583762322497764, 'learning_rate': 7.162404351228687e-06, 'epoch': 0.38} + 38%|███▊ | 4595/12188 [9:58:19<15:32:59, 7.37s/it] 38%|███▊ | 4596/12188 [9:58:26<15:01:52, 7.13s/it] {'loss': 0.3357, 'grad_norm': 0.6327589236397315, 'learning_rate': 7.161206256209219e-06, 'epoch': 0.38} + 38%|███▊ | 4596/12188 [9:58:26<15:01:52, 7.13s/it] 38%|███▊ | 4597/12188 [9:58:32<14:45:38, 7.00s/it] {'loss': 0.3504, 'grad_norm': 0.6769617055291326, 'learning_rate': 7.160008008568838e-06, 'epoch': 0.38} + 38%|███▊ | 4597/12188 [9:58:32<14:45:38, 7.00s/it] 38%|███▊ | 4598/12188 [9:58:39<14:27:49, 6.86s/it] {'loss': 0.3649, 'grad_norm': 0.6675015343935601, 'learning_rate': 7.158809608392162e-06, 'epoch': 0.38} + 38%|███▊ | 4598/12188 [9:58:39<14:27:49, 6.86s/it] 38%|███▊ | 4599/12188 [9:58:49<16:43:50, 7.94s/it] {'loss': 0.3186, 'grad_norm': 0.6589232453663924, 'learning_rate': 7.15761105576382e-06, 'epoch': 0.38} + 38%|███▊ | 4599/12188 [9:58:49<16:43:50, 7.94s/it] 38%|███▊ | 4600/12188 [9:58:57<16:36:23, 7.88s/it] {'loss': 0.3453, 'grad_norm': 0.6698742371630652, 'learning_rate': 7.156412350768453e-06, 'epoch': 0.38} + 38%|███▊ | 4600/12188 [9:58:57<16:36:23, 7.88s/it] 38%|███▊ | 4601/12188 [9:59:04<15:52:40, 7.53s/it] {'loss': 0.3538, 'grad_norm': 0.6214184043187801, 'learning_rate': 7.155213493490708e-06, 'epoch': 0.38} + 38%|███▊ | 4601/12188 [9:59:04<15:52:40, 7.53s/it] 38%|███▊ | 4602/12188 [9:59:11<15:57:25, 7.57s/it] {'loss': 0.3201, 'grad_norm': 0.5598535660592915, 'learning_rate': 7.154014484015249e-06, 'epoch': 0.38} + 38%|███▊ | 4602/12188 [9:59:11<15:57:25, 7.57s/it] 38%|███▊ | 4603/12188 [9:59:18<15:30:47, 7.36s/it] {'loss': 0.3025, 'grad_norm': 0.6053298589460756, 'learning_rate': 7.152815322426749e-06, 'epoch': 0.38} + 38%|███▊ | 4603/12188 [9:59:18<15:30:47, 7.36s/it] 38%|███▊ | 4604/12188 [9:59:28<16:43:22, 7.94s/it] {'loss': 0.2942, 'grad_norm': 0.6043484274724334, 'learning_rate': 7.151616008809889e-06, 'epoch': 0.38} + 38%|███▊ | 4604/12188 [9:59:28<16:43:22, 7.94s/it] 38%|███▊ | 4605/12188 [9:59:37<17:47:43, 8.45s/it] {'loss': 0.3584, 'grad_norm': 0.6384230919752857, 'learning_rate': 7.150416543249364e-06, 'epoch': 0.38} + 38%|███▊ | 4605/12188 [9:59:37<17:47:43, 8.45s/it] 38%|███▊ | 4606/12188 [9:59:44<16:55:04, 8.03s/it] {'loss': 0.3166, 'grad_norm': 0.6594304640773905, 'learning_rate': 7.149216925829876e-06, 'epoch': 0.38} + 38%|███▊ | 4606/12188 [9:59:44<16:55:04, 8.03s/it] 38%|███▊ | 4607/12188 [9:59:51<16:09:43, 7.67s/it] {'loss': 0.3441, 'grad_norm': 0.6263459003309572, 'learning_rate': 7.148017156636144e-06, 'epoch': 0.38} + 38%|███▊ | 4607/12188 [9:59:51<16:09:43, 7.67s/it] 38%|███▊ | 4608/12188 [9:59:58<15:38:34, 7.43s/it] {'loss': 0.3392, 'grad_norm': 0.6649856142974321, 'learning_rate': 7.14681723575289e-06, 'epoch': 0.38} + 38%|███▊ | 4608/12188 [9:59:58<15:38:34, 7.43s/it] 38%|███▊ | 4609/12188 [10:00:05<15:12:52, 7.23s/it] {'loss': 0.3648, 'grad_norm': 0.6288700692100119, 'learning_rate': 7.145617163264851e-06, 'epoch': 0.38} + 38%|███▊ | 4609/12188 [10:00:05<15:12:52, 7.23s/it] 38%|███▊ | 4610/12188 [10:00:12<15:27:31, 7.34s/it] {'loss': 0.3992, 'grad_norm': 0.6722480482214914, 'learning_rate': 7.144416939256777e-06, 'epoch': 0.38} + 38%|███▊ | 4610/12188 [10:00:12<15:27:31, 7.34s/it] 38%|███▊ | 4611/12188 [10:00:19<14:59:53, 7.13s/it] {'loss': 0.3349, 'grad_norm': 0.6676482889913766, 'learning_rate': 7.143216563813422e-06, 'epoch': 0.38} + 38%|███▊ | 4611/12188 [10:00:19<14:59:53, 7.13s/it] 38%|███▊ | 4612/12188 [10:00:26<14:46:50, 7.02s/it] {'loss': 0.3092, 'grad_norm': 0.6058845333353462, 'learning_rate': 7.142016037019558e-06, 'epoch': 0.38} + 38%|███▊ | 4612/12188 [10:00:26<14:46:50, 7.02s/it] 38%|███▊ | 4613/12188 [10:00:34<15:16:47, 7.26s/it] {'loss': 0.3279, 'grad_norm': 0.6059316919339756, 'learning_rate': 7.140815358959963e-06, 'epoch': 0.38} + 38%|███▊ | 4613/12188 [10:00:34<15:16:47, 7.26s/it] 38%|███▊ | 4614/12188 [10:00:40<14:50:54, 7.06s/it] {'loss': 0.3244, 'grad_norm': 0.6179206172006814, 'learning_rate': 7.139614529719427e-06, 'epoch': 0.38} + 38%|███▊ | 4614/12188 [10:00:40<14:50:54, 7.06s/it] 38%|███▊ | 4615/12188 [10:00:47<14:29:32, 6.89s/it] {'loss': 0.3445, 'grad_norm': 0.7038102112651489, 'learning_rate': 7.13841354938275e-06, 'epoch': 0.38} + 38%|███▊ | 4615/12188 [10:00:47<14:29:32, 6.89s/it] 38%|███▊ | 4616/12188 [10:00:54<14:35:11, 6.93s/it] {'loss': 0.3611, 'grad_norm': 0.610168456491401, 'learning_rate': 7.137212418034744e-06, 'epoch': 0.38} + 38%|███▊ | 4616/12188 [10:00:54<14:35:11, 6.93s/it] 38%|███▊ | 4617/12188 [10:01:01<14:44:10, 7.01s/it] {'loss': 0.3417, 'grad_norm': 0.6398608324819129, 'learning_rate': 7.13601113576023e-06, 'epoch': 0.38} + 38%|███▊ | 4617/12188 [10:01:01<14:44:10, 7.01s/it] 38%|███▊ | 4618/12188 [10:01:09<15:27:43, 7.35s/it] {'loss': 0.3388, 'grad_norm': 0.615174691070354, 'learning_rate': 7.134809702644042e-06, 'epoch': 0.38} + 38%|███▊ | 4618/12188 [10:01:09<15:27:43, 7.35s/it] 38%|███▊ | 4619/12188 [10:01:16<15:06:08, 7.18s/it] {'loss': 0.3019, 'grad_norm': 0.6362750750975755, 'learning_rate': 7.1336081187710225e-06, 'epoch': 0.38} + 38%|███▊ | 4619/12188 [10:01:16<15:06:08, 7.18s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fe93d07e750> +[Try #0] Failed to fetch sample 4739995 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fe93d07e750> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Jewelry & Watches'"}, {'from': 'gpt', 'value': '\nclick(x=0.413, y=0.266)\n'}]} + 38%|███▊ | 4620/12188 [10:01:23<15:20:10, 7.30s/it] {'loss': 0.3546, 'grad_norm': 0.618682997169944, 'learning_rate': 7.132406384226026e-06, 'epoch': 0.38} + 38%|███▊ | 4620/12188 [10:01:23<15:20:10, 7.30s/it] 38%|███▊ | 4621/12188 [10:01:30<14:55:54, 7.10s/it] {'loss': 0.3695, 'grad_norm': 0.6554493974317134, 'learning_rate': 7.1312044990939155e-06, 'epoch': 0.38} + 38%|███▊ | 4621/12188 [10:01:30<14:55:54, 7.10s/it] 38%|███▊ | 4622/12188 [10:01:40<16:55:21, 8.05s/it] {'loss': 0.3193, 'grad_norm': 0.6160912395014112, 'learning_rate': 7.130002463459569e-06, 'epoch': 0.38} + 38%|███▊ | 4622/12188 [10:01:40<16:55:21, 8.05s/it] 38%|███▊ | 4623/12188 [10:01:47<16:16:07, 7.74s/it] {'loss': 0.3464, 'grad_norm': 0.6168319962086525, 'learning_rate': 7.128800277407869e-06, 'epoch': 0.38} + 38%|███▊ | 4623/12188 [10:01:47<16:16:07, 7.74s/it] 38%|███▊ | 4624/12188 [10:01:55<15:58:30, 7.60s/it] {'loss': 0.3756, 'grad_norm': 0.6209990516895828, 'learning_rate': 7.127597941023714e-06, 'epoch': 0.38} + 38%|███▊ | 4624/12188 [10:01:55<15:58:30, 7.60s/it] 38%|███▊ | 4625/12188 [10:02:02<15:37:56, 7.44s/it] {'loss': 0.3605, 'grad_norm': 0.6359743173956799, 'learning_rate': 7.12639545439201e-06, 'epoch': 0.38} + 38%|███▊ | 4625/12188 [10:02:02<15:37:56, 7.44s/it] 38%|███▊ | 4626/12188 [10:02:09<15:25:29, 7.34s/it] {'loss': 0.3739, 'grad_norm': 0.6420938389637031, 'learning_rate': 7.125192817597677e-06, 'epoch': 0.38} + 38%|███▊ | 4626/12188 [10:02:09<15:25:29, 7.34s/it] 38%|███▊ | 4627/12188 [10:02:16<15:11:59, 7.24s/it] {'loss': 0.345, 'grad_norm': 0.686949918105225, 'learning_rate': 7.123990030725641e-06, 'epoch': 0.38} + 38%|███▊ | 4627/12188 [10:02:16<15:11:59, 7.24s/it] 38%|███▊ | 4628/12188 [10:02:23<15:01:19, 7.15s/it] {'loss': 0.3623, 'grad_norm': 0.7394284048511102, 'learning_rate': 7.1227870938608424e-06, 'epoch': 0.38} + 38%|███▊ | 4628/12188 [10:02:23<15:01:19, 7.15s/it] 38%|███▊ | 4629/12188 [10:02:30<14:51:08, 7.07s/it] {'loss': 0.3478, 'grad_norm': 0.6078096466837737, 'learning_rate': 7.121584007088227e-06, 'epoch': 0.38} + 38%|███▊ | 4629/12188 [10:02:30<14:51:08, 7.07s/it] 38%|███▊ | 4630/12188 [10:02:38<15:28:57, 7.37s/it] {'loss': 0.3822, 'grad_norm': 0.6767129742039698, 'learning_rate': 7.12038077049276e-06, 'epoch': 0.38} + 38%|███▊ | 4630/12188 [10:02:38<15:28:57, 7.37s/it] 38%|███▊ | 4631/12188 [10:02:45<15:26:42, 7.36s/it] {'loss': 0.3241, 'grad_norm': 0.6466603985766054, 'learning_rate': 7.119177384159408e-06, 'epoch': 0.38} + 38%|███▊ | 4631/12188 [10:02:45<15:26:42, 7.36s/it] 38%|███▊ | 4632/12188 [10:02:53<16:01:45, 7.64s/it] {'loss': 0.3597, 'grad_norm': 0.6224227990810629, 'learning_rate': 7.117973848173155e-06, 'epoch': 0.38} + 38%|███▊ | 4632/12188 [10:02:53<16:01:45, 7.64s/it] 38%|███▊ | 4633/12188 [10:03:00<15:26:42, 7.36s/it] {'loss': 0.3546, 'grad_norm': 0.6316224364934603, 'learning_rate': 7.116770162618992e-06, 'epoch': 0.38} + 38%|███▊ | 4633/12188 [10:03:00<15:26:42, 7.36s/it] 38%|███▊ | 4634/12188 [10:03:07<15:10:16, 7.23s/it] {'loss': 0.3539, 'grad_norm': 0.6153778948050093, 'learning_rate': 7.11556632758192e-06, 'epoch': 0.38} + 38%|███▊ | 4634/12188 [10:03:07<15:10:16, 7.23s/it] 38%|███▊ | 4635/12188 [10:03:15<15:46:47, 7.52s/it] {'loss': 0.3301, 'grad_norm': 0.6288405494355122, 'learning_rate': 7.114362343146953e-06, 'epoch': 0.38} + 38%|███▊ | 4635/12188 [10:03:15<15:46:47, 7.52s/it] 38%|███▊ | 4636/12188 [10:03:23<16:00:55, 7.63s/it] {'loss': 0.3596, 'grad_norm': 0.69984218722341, 'learning_rate': 7.113158209399115e-06, 'epoch': 0.38} + 38%|███▊ | 4636/12188 [10:03:23<16:00:55, 7.63s/it] 38%|███▊ | 4637/12188 [10:03:30<15:41:10, 7.48s/it] {'loss': 0.3449, 'grad_norm': 0.581289559179693, 'learning_rate': 7.111953926423439e-06, 'epoch': 0.38} + 38%|███▊ | 4637/12188 [10:03:30<15:41:10, 7.48s/it] 38%|███▊ | 4638/12188 [10:03:37<15:20:41, 7.32s/it] {'loss': 0.356, 'grad_norm': 0.6611346918409942, 'learning_rate': 7.11074949430497e-06, 'epoch': 0.38} + 38%|███▊ | 4638/12188 [10:03:37<15:20:41, 7.32s/it] 38%|███▊ | 4639/12188 [10:03:45<15:37:26, 7.45s/it] {'loss': 0.3722, 'grad_norm': 0.7413926830620532, 'learning_rate': 7.109544913128761e-06, 'epoch': 0.38} + 38%|███▊ | 4639/12188 [10:03:45<15:37:26, 7.45s/it] 38%|███▊ | 4640/12188 [10:03:52<15:17:38, 7.29s/it] {'loss': 0.3465, 'grad_norm': 0.6509113468802402, 'learning_rate': 7.108340182979881e-06, 'epoch': 0.38} + 38%|███▊ | 4640/12188 [10:03:52<15:17:38, 7.29s/it] 38%|███▊ | 4641/12188 [10:03:58<14:51:39, 7.09s/it] {'loss': 0.3556, 'grad_norm': 0.6649248102714362, 'learning_rate': 7.107135303943406e-06, 'epoch': 0.38} + 38%|███▊ | 4641/12188 [10:03:58<14:51:39, 7.09s/it] 38%|███▊ | 4642/12188 [10:04:06<15:22:33, 7.34s/it] {'loss': 0.3384, 'grad_norm': 0.6376762570057808, 'learning_rate': 7.10593027610442e-06, 'epoch': 0.38} + 38%|███▊ | 4642/12188 [10:04:06<15:22:33, 7.34s/it] 38%|███▊ | 4643/12188 [10:04:14<15:19:04, 7.31s/it] {'loss': 0.343, 'grad_norm': 0.6916527744300611, 'learning_rate': 7.10472509954802e-06, 'epoch': 0.38} + 38%|███▊ | 4643/12188 [10:04:14<15:19:04, 7.31s/it] 38%|███▊ | 4644/12188 [10:04:21<15:14:10, 7.27s/it] {'loss': 0.3233, 'grad_norm': 0.6830960883510018, 'learning_rate': 7.103519774359316e-06, 'epoch': 0.38} + 38%|███▊ | 4644/12188 [10:04:21<15:14:10, 7.27s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f054e7fbab0> +[Try #0] Failed to fetch sample 1034021 in VC:s3://gui/aguvis/aguvis-stage2/amex/images. Exception: cannot identify image file <_io.BytesIO object at 0x7f054e7fbab0> +Problematic sample: {'image': 'bf3459bcaf434803a580fcd36cbe71aestep0.png', 'conversations': [{'from': 'human', 'value': '\nPlease generate the next move according to the UI screenshot, the task and previous operations.\n\nTask:\nOpen AP News. Share the link of the first article in the "Business" category\n\nPrevious operations:\nNone'}, {'from': 'gpt', 'value': "\nTap on the AP News app to open it.\n\n\nterminate(status='success')\n"}]} + 38%|███▊ | 4645/12188 [10:04:28<15:16:41, 7.29s/it] {'loss': 0.3613, 'grad_norm': 0.7235524853796136, 'learning_rate': 7.102314300623425e-06, 'epoch': 0.38} + 38%|███▊ | 4645/12188 [10:04:28<15:16:41, 7.29s/it] 38%|███▊ | 4646/12188 [10:04:35<15:21:00, 7.33s/it] {'loss': 0.3053, 'grad_norm': 0.6483337368142531, 'learning_rate': 7.1011086784254765e-06, 'epoch': 0.38} + 38%|███▊ | 4646/12188 [10:04:35<15:21:00, 7.33s/it] 38%|███▊ | 4647/12188 [10:04:42<15:05:54, 7.21s/it] {'loss': 0.3502, 'grad_norm': 0.6685637610906855, 'learning_rate': 7.0999029078506075e-06, 'epoch': 0.38} + 38%|███▊ | 4647/12188 [10:04:42<15:05:54, 7.21s/it] 38%|███▊ | 4648/12188 [10:04:50<15:02:47, 7.18s/it] {'loss': 0.3457, 'grad_norm': 0.640235014361567, 'learning_rate': 7.098696988983971e-06, 'epoch': 0.38} + 38%|███▊ | 4648/12188 [10:04:50<15:02:47, 7.18s/it] 38%|███▊ | 4649/12188 [10:04:57<15:01:08, 7.17s/it] {'loss': 0.338, 'grad_norm': 0.6574630403766186, 'learning_rate': 7.097490921910724e-06, 'epoch': 0.38} + 38%|███▊ | 4649/12188 [10:04:57<15:01:08, 7.17s/it] 38%|███▊ | 4650/12188 [10:05:03<14:45:25, 7.05s/it] {'loss': 0.3408, 'grad_norm': 0.6313317937295632, 'learning_rate': 7.096284706716037e-06, 'epoch': 0.38} + 38%|███▊ | 4650/12188 [10:05:03<14:45:25, 7.05s/it] 38%|███▊ | 4651/12188 [10:05:10<14:25:12, 6.89s/it] {'loss': 0.3596, 'grad_norm': 0.6500644362236917, 'learning_rate': 7.095078343485092e-06, 'epoch': 0.38} + 38%|███▊ | 4651/12188 [10:05:10<14:25:12, 6.89s/it] 38%|███▊ | 4652/12188 [10:05:17<14:39:21, 7.00s/it] {'loss': 0.3444, 'grad_norm': 0.6383067557191806, 'learning_rate': 7.0938718323030805e-06, 'epoch': 0.38} + 38%|███▊ | 4652/12188 [10:05:17<14:39:21, 7.00s/it] 38%|███▊ | 4653/12188 [10:05:25<15:12:07, 7.26s/it] {'loss': 0.3381, 'grad_norm': 0.6456607901486484, 'learning_rate': 7.092665173255205e-06, 'epoch': 0.38} + 38%|███▊ | 4653/12188 [10:05:25<15:12:07, 7.26s/it] 38%|███▊ | 4654/12188 [10:05:32<14:57:17, 7.15s/it] {'loss': 0.3765, 'grad_norm': 0.6902482327208974, 'learning_rate': 7.0914583664266765e-06, 'epoch': 0.38} + 38%|███▊ | 4654/12188 [10:05:32<14:57:17, 7.15s/it] 38%|███▊ | 4655/12188 [10:05:38<14:34:03, 6.96s/it] {'loss': 0.3832, 'grad_norm': 0.6401415544028507, 'learning_rate': 7.090251411902719e-06, 'epoch': 0.38} + 38%|███▊ | 4655/12188 [10:05:38<14:34:03, 6.96s/it] 38%|███▊ | 4656/12188 [10:05:45<14:18:32, 6.84s/it] {'loss': 0.3108, 'grad_norm': 0.7117352057134897, 'learning_rate': 7.089044309768565e-06, 'epoch': 0.38} + 38%|███▊ | 4656/12188 [10:05:45<14:18:32, 6.84s/it] 38%|███▊ | 4657/12188 [10:05:53<14:52:41, 7.11s/it] {'loss': 0.3717, 'grad_norm': 0.7013307059044207, 'learning_rate': 7.087837060109457e-06, 'epoch': 0.38} + 38%|███▊ | 4657/12188 [10:05:53<14:52:41, 7.11s/it] 38%|███▊ | 4658/12188 [10:06:00<15:03:59, 7.20s/it] {'loss': 0.3414, 'grad_norm': 0.6539154226430143, 'learning_rate': 7.086629663010651e-06, 'epoch': 0.38} + 38%|███▊ | 4658/12188 [10:06:00<15:03:59, 7.20s/it] 38%|███▊ | 4659/12188 [10:06:07<14:47:04, 7.07s/it] {'loss': 0.358, 'grad_norm': 0.6114126278442333, 'learning_rate': 7.085422118557411e-06, 'epoch': 0.38} + 38%|███▊ | 4659/12188 [10:06:07<14:47:04, 7.07s/it] 38%|███▊ | 4660/12188 [10:06:14<14:53:44, 7.12s/it] {'loss': 0.3345, 'grad_norm': 0.5974801472897218, 'learning_rate': 7.08421442683501e-06, 'epoch': 0.38} + 38%|███▊ | 4660/12188 [10:06:14<14:53:44, 7.12s/it] 38%|███▊ | 4661/12188 [10:06:21<14:48:31, 7.08s/it] {'loss': 0.3188, 'grad_norm': 0.6527621455724205, 'learning_rate': 7.083006587928736e-06, 'epoch': 0.38} + 38%|███▊ | 4661/12188 [10:06:21<14:48:31, 7.08s/it] 38%|███▊ | 4662/12188 [10:06:28<14:44:53, 7.05s/it] {'loss': 0.3597, 'grad_norm': 0.6575883877036462, 'learning_rate': 7.081798601923883e-06, 'epoch': 0.38} + 38%|███▊ | 4662/12188 [10:06:28<14:44:53, 7.05s/it] 38%|███▊ | 4663/12188 [10:06:35<14:35:14, 6.98s/it] {'loss': 0.3206, 'grad_norm': 0.6449736472558852, 'learning_rate': 7.08059046890576e-06, 'epoch': 0.38} + 38%|███▊ | 4663/12188 [10:06:35<14:35:14, 6.98s/it] 38%|███▊ | 4664/12188 [10:06:42<14:37:18, 7.00s/it] {'loss': 0.3823, 'grad_norm': 0.6796440147154437, 'learning_rate': 7.079382188959677e-06, 'epoch': 0.38} + 38%|███▊ | 4664/12188 [10:06:42<14:37:18, 7.00s/it] 38%|███▊ | 4665/12188 [10:06:49<14:21:54, 6.87s/it] {'loss': 0.371, 'grad_norm': 0.6072123729904314, 'learning_rate': 7.078173762170966e-06, 'epoch': 0.38} + 38%|███▊ | 4665/12188 [10:06:49<14:21:54, 6.87s/it] 38%|███▊ | 4666/12188 [10:06:55<14:19:47, 6.86s/it] {'loss': 0.3496, 'grad_norm': 0.6496195573907408, 'learning_rate': 7.076965188624963e-06, 'epoch': 0.38} + 38%|███▊ | 4666/12188 [10:06:55<14:19:47, 6.86s/it] 38%|███▊ | 4667/12188 [10:07:02<14:21:23, 6.87s/it] {'loss': 0.3441, 'grad_norm': 0.5814918922101804, 'learning_rate': 7.075756468407014e-06, 'epoch': 0.38} + 38%|███▊ | 4667/12188 [10:07:02<14:21:23, 6.87s/it] 38%|███▊ | 4668/12188 [10:07:09<14:15:35, 6.83s/it] {'loss': 0.3161, 'grad_norm': 0.6598857082746105, 'learning_rate': 7.074547601602479e-06, 'epoch': 0.38} + 38%|███▊ | 4668/12188 [10:07:09<14:15:35, 6.83s/it] 38%|███▊ | 4669/12188 [10:07:17<14:53:20, 7.13s/it] {'loss': 0.3405, 'grad_norm': 0.5799553260545802, 'learning_rate': 7.073338588296725e-06, 'epoch': 0.38} + 38%|███▊ | 4669/12188 [10:07:17<14:53:20, 7.13s/it] 38%|███▊ | 4670/12188 [10:07:24<14:49:34, 7.10s/it] {'loss': 0.3361, 'grad_norm': 0.6362750059433098, 'learning_rate': 7.072129428575132e-06, 'epoch': 0.38} + 38%|███▊ | 4670/12188 [10:07:24<14:49:34, 7.10s/it] 38%|███▊ | 4671/12188 [10:07:31<14:41:39, 7.04s/it] {'loss': 0.3813, 'grad_norm': 0.6853901222767201, 'learning_rate': 7.070920122523088e-06, 'epoch': 0.38} + 38%|███▊ | 4671/12188 [10:07:31<14:41:39, 7.04s/it] 38%|███▊ | 4672/12188 [10:07:38<14:53:38, 7.13s/it] {'loss': 0.3949, 'grad_norm': 0.6801754449818809, 'learning_rate': 7.069710670225991e-06, 'epoch': 0.38} + 38%|███▊ | 4672/12188 [10:07:38<14:53:38, 7.13s/it] 38%|███▊ | 4673/12188 [10:07:46<15:10:53, 7.27s/it] {'loss': 0.3957, 'grad_norm': 0.6987238566839218, 'learning_rate': 7.068501071769253e-06, 'epoch': 0.38} + 38%|███▊ | 4673/12188 [10:07:46<15:10:53, 7.27s/it] 38%|███▊ | 4674/12188 [10:07:53<15:06:48, 7.24s/it] {'loss': 0.3625, 'grad_norm': 0.7144356373853752, 'learning_rate': 7.06729132723829e-06, 'epoch': 0.38} + 38%|███▊ | 4674/12188 [10:07:53<15:06:48, 7.24s/it] 38%|███▊ | 4675/12188 [10:08:00<14:56:19, 7.16s/it] {'loss': 0.3564, 'grad_norm': 0.7051687642855712, 'learning_rate': 7.066081436718537e-06, 'epoch': 0.38} + 38%|███▊ | 4675/12188 [10:08:00<14:56:19, 7.16s/it] 38%|███▊ | 4676/12188 [10:08:07<14:46:59, 7.08s/it] {'loss': 0.3516, 'grad_norm': 0.6146888317710476, 'learning_rate': 7.0648714002954336e-06, 'epoch': 0.38} + 38%|███▊ | 4676/12188 [10:08:07<14:46:59, 7.08s/it] 38%|███▊ | 4677/12188 [10:08:15<15:20:40, 7.35s/it] {'loss': 0.3566, 'grad_norm': 0.6692583695599219, 'learning_rate': 7.063661218054427e-06, 'epoch': 0.38} + 38%|███▊ | 4677/12188 [10:08:15<15:20:40, 7.35s/it] 38%|███▊ | 4678/12188 [10:08:22<14:56:52, 7.17s/it] {'loss': 0.3447, 'grad_norm': 0.6948625367916874, 'learning_rate': 7.0624508900809815e-06, 'epoch': 0.38} + 38%|███▊ | 4678/12188 [10:08:22<14:56:52, 7.17s/it] 38%|███▊ | 4679/12188 [10:08:29<15:10:45, 7.28s/it] {'loss': 0.3924, 'grad_norm': 1.1970696329767296, 'learning_rate': 7.061240416460568e-06, 'epoch': 0.38} + 38%|███▊ | 4679/12188 [10:08:29<15:10:45, 7.28s/it] 38%|███▊ | 4680/12188 [10:08:37<15:43:25, 7.54s/it] {'loss': 0.3454, 'grad_norm': 0.7092447645197281, 'learning_rate': 7.060029797278667e-06, 'epoch': 0.38} + 38%|███▊ | 4680/12188 [10:08:37<15:43:25, 7.54s/it] 38%|███▊ | 4681/12188 [10:08:45<15:55:46, 7.64s/it] {'loss': 0.3643, 'grad_norm': 0.6523569088896445, 'learning_rate': 7.058819032620773e-06, 'epoch': 0.38} + 38%|███▊ | 4681/12188 [10:08:45<15:55:46, 7.64s/it] 38%|███▊ | 4682/12188 [10:08:52<15:43:24, 7.54s/it] {'loss': 0.3435, 'grad_norm': 0.6093158175579021, 'learning_rate': 7.0576081225723855e-06, 'epoch': 0.38} + 38%|███▊ | 4682/12188 [10:08:52<15:43:24, 7.54s/it] 38%|███▊ | 4683/12188 [10:08:59<15:18:15, 7.34s/it] {'loss': 0.329, 'grad_norm': 0.6535634717504762, 'learning_rate': 7.0563970672190185e-06, 'epoch': 0.38} + 38%|███▊ | 4683/12188 [10:08:59<15:18:15, 7.34s/it] 38%|███▊ | 4684/12188 [10:09:07<15:41:23, 7.53s/it] {'loss': 0.31, 'grad_norm': 0.6168477146793439, 'learning_rate': 7.055185866646195e-06, 'epoch': 0.38} + 38%|███▊ | 4684/12188 [10:09:07<15:41:23, 7.53s/it] 38%|███▊ | 4685/12188 [10:09:15<15:57:31, 7.66s/it] {'loss': 0.3285, 'grad_norm': 0.6869006389820373, 'learning_rate': 7.053974520939446e-06, 'epoch': 0.38} + 38%|███▊ | 4685/12188 [10:09:15<15:57:31, 7.66s/it] 38%|███▊ | 4686/12188 [10:09:23<15:48:41, 7.59s/it] {'loss': 0.3384, 'grad_norm': 0.6826708065766045, 'learning_rate': 7.052763030184317e-06, 'epoch': 0.38} + 38%|███▊ | 4686/12188 [10:09:23<15:48:41, 7.59s/it] 38%|███▊ | 4687/12188 [10:09:30<15:27:50, 7.42s/it] {'loss': 0.3969, 'grad_norm': 0.6670061317108733, 'learning_rate': 7.05155139446636e-06, 'epoch': 0.38} + 38%|███▊ | 4687/12188 [10:09:30<15:27:50, 7.42s/it] 38%|███▊ | 4688/12188 [10:09:36<15:04:01, 7.23s/it] {'loss': 0.365, 'grad_norm': 0.6452404811563304, 'learning_rate': 7.050339613871141e-06, 'epoch': 0.38} + 38%|███▊ | 4688/12188 [10:09:36<15:04:01, 7.23s/it] 38%|███▊ | 4689/12188 [10:09:45<15:59:58, 7.68s/it] {'loss': 0.3779, 'grad_norm': 0.6088569946308204, 'learning_rate': 7.049127688484232e-06, 'epoch': 0.38} + 38%|███▊ | 4689/12188 [10:09:45<15:59:58, 7.68s/it] 38%|███▊ | 4690/12188 [10:09:52<15:21:57, 7.38s/it] {'loss': 0.3481, 'grad_norm': 0.717471665958166, 'learning_rate': 7.047915618391219e-06, 'epoch': 0.38} + 38%|███▊ | 4690/12188 [10:09:52<15:21:57, 7.38s/it] 38%|███▊ | 4691/12188 [10:09:59<15:21:29, 7.37s/it] {'loss': 0.324, 'grad_norm': 0.6385994861818417, 'learning_rate': 7.0467034036776945e-06, 'epoch': 0.38} + 38%|███▊ | 4691/12188 [10:09:59<15:21:29, 7.37s/it] 38%|███▊ | 4692/12188 [10:10:06<15:06:31, 7.26s/it] {'loss': 0.3428, 'grad_norm': 0.597639356115617, 'learning_rate': 7.0454910444292634e-06, 'epoch': 0.38} + 38%|███▊ | 4692/12188 [10:10:06<15:06:31, 7.26s/it] 39%|███▊ | 4693/12188 [10:10:14<15:23:43, 7.39s/it] {'loss': 0.3789, 'grad_norm': 0.7258858202217039, 'learning_rate': 7.044278540731542e-06, 'epoch': 0.39} + 39%|███▊ | 4693/12188 [10:10:14<15:23:43, 7.39s/it] 39%|███▊ | 4694/12188 [10:10:22<15:53:45, 7.64s/it] {'loss': 0.3718, 'grad_norm': 0.6327011780901894, 'learning_rate': 7.043065892670154e-06, 'epoch': 0.39} + 39%|███▊ | 4694/12188 [10:10:22<15:53:45, 7.64s/it] 39%|███▊ | 4695/12188 [10:10:30<15:49:36, 7.60s/it] {'loss': 0.3357, 'grad_norm': 0.6614184155300179, 'learning_rate': 7.041853100330733e-06, 'epoch': 0.39} + 39%|███▊ | 4695/12188 [10:10:30<15:49:36, 7.60s/it] 39%|███▊ | 4696/12188 [10:10:36<15:12:09, 7.30s/it] {'loss': 0.3501, 'grad_norm': 0.6409723023988451, 'learning_rate': 7.04064016379893e-06, 'epoch': 0.39} + 39%|███▊ | 4696/12188 [10:10:36<15:12:09, 7.30s/it] 39%|███▊ | 4697/12188 [10:10:43<15:05:34, 7.25s/it] {'loss': 0.3331, 'grad_norm': 0.6299453587421379, 'learning_rate': 7.039427083160395e-06, 'epoch': 0.39} + 39%|███▊ | 4697/12188 [10:10:43<15:05:34, 7.25s/it] 39%|███▊ | 4698/12188 [10:10:51<15:18:47, 7.36s/it] {'loss': 0.3469, 'grad_norm': 0.6209352033359549, 'learning_rate': 7.038213858500796e-06, 'epoch': 0.39} + 39%|███▊ | 4698/12188 [10:10:51<15:18:47, 7.36s/it] 39%|███▊ | 4699/12188 [10:10:59<15:25:23, 7.41s/it] {'loss': 0.33, 'grad_norm': 0.6283319564526384, 'learning_rate': 7.0370004899058095e-06, 'epoch': 0.39} + 39%|███▊ | 4699/12188 [10:10:59<15:25:23, 7.41s/it] 39%|███▊ | 4700/12188 [10:11:05<14:58:37, 7.20s/it] {'loss': 0.3207, 'grad_norm': 0.6136776872930106, 'learning_rate': 7.035786977461122e-06, 'epoch': 0.39} + 39%|███▊ | 4700/12188 [10:11:05<14:58:37, 7.20s/it] 39%|███▊ | 4701/12188 [10:11:12<14:50:50, 7.14s/it] {'loss': 0.3137, 'grad_norm': 0.6189912581070234, 'learning_rate': 7.034573321252427e-06, 'epoch': 0.39} + 39%|███▊ | 4701/12188 [10:11:12<14:50:50, 7.14s/it] 39%|███▊ | 4702/12188 [10:11:20<15:03:51, 7.24s/it] {'loss': 0.3614, 'grad_norm': 0.6604956207653381, 'learning_rate': 7.033359521365434e-06, 'epoch': 0.39} + 39%|███▊ | 4702/12188 [10:11:20<15:03:51, 7.24s/it] 39%|███▊ | 4703/12188 [10:11:28<15:47:27, 7.59s/it] {'loss': 0.3467, 'grad_norm': 0.5955116122108765, 'learning_rate': 7.032145577885858e-06, 'epoch': 0.39} + 39%|███▊ | 4703/12188 [10:11:28<15:47:27, 7.59s/it] 39%|███▊ | 4704/12188 [10:11:35<15:36:58, 7.51s/it] {'loss': 0.4079, 'grad_norm': 0.7040972731573512, 'learning_rate': 7.030931490899427e-06, 'epoch': 0.39} + 39%|███▊ | 4704/12188 [10:11:35<15:36:58, 7.51s/it] 39%|███▊ | 4705/12188 [10:11:43<15:37:19, 7.52s/it] {'loss': 0.3484, 'grad_norm': 0.6333611660811831, 'learning_rate': 7.029717260491876e-06, 'epoch': 0.39} + 39%|███▊ | 4705/12188 [10:11:43<15:37:19, 7.52s/it] 39%|███▊ | 4706/12188 [10:11:50<15:16:09, 7.35s/it] {'loss': 0.34, 'grad_norm': 0.6336255809191544, 'learning_rate': 7.0285028867489535e-06, 'epoch': 0.39} + 39%|███▊ | 4706/12188 [10:11:50<15:16:09, 7.35s/it] 39%|███▊ | 4707/12188 [10:11:59<16:04:40, 7.74s/it] {'loss': 0.3914, 'grad_norm': 0.6700850190754825, 'learning_rate': 7.0272883697564175e-06, 'epoch': 0.39} + 39%|███▊ | 4707/12188 [10:11:59<16:04:40, 7.74s/it] 39%|███▊ | 4708/12188 [10:12:07<16:21:06, 7.87s/it] {'loss': 0.3595, 'grad_norm': 0.6159136465534256, 'learning_rate': 7.026073709600032e-06, 'epoch': 0.39} + 39%|███▊ | 4708/12188 [10:12:07<16:21:06, 7.87s/it] 39%|███▊ | 4709/12188 [10:12:14<15:59:17, 7.70s/it] {'loss': 0.3712, 'grad_norm': 0.6063251467769445, 'learning_rate': 7.024858906365577e-06, 'epoch': 0.39} + 39%|███▊ | 4709/12188 [10:12:14<15:59:17, 7.70s/it] 39%|███▊ | 4710/12188 [10:12:21<15:21:04, 7.39s/it] {'loss': 0.339, 'grad_norm': 0.6525735740234816, 'learning_rate': 7.02364396013884e-06, 'epoch': 0.39} + 39%|███▊ | 4710/12188 [10:12:21<15:21:04, 7.39s/it] 39%|███▊ | 4711/12188 [10:12:28<15:12:58, 7.33s/it] {'loss': 0.3366, 'grad_norm': 0.6193555781656128, 'learning_rate': 7.022428871005617e-06, 'epoch': 0.39} + 39%|███▊ | 4711/12188 [10:12:28<15:12:58, 7.33s/it] 39%|███▊ | 4712/12188 [10:12:35<14:47:18, 7.12s/it] {'loss': 0.3427, 'grad_norm': 0.6214093320753337, 'learning_rate': 7.021213639051716e-06, 'epoch': 0.39} + 39%|███▊ | 4712/12188 [10:12:35<14:47:18, 7.12s/it] 39%|███▊ | 4713/12188 [10:12:42<15:18:37, 7.37s/it] {'loss': 0.3657, 'grad_norm': 0.6449756539075978, 'learning_rate': 7.019998264362955e-06, 'epoch': 0.39} + 39%|███▊ | 4713/12188 [10:12:42<15:18:37, 7.37s/it] 39%|███▊ | 4714/12188 [10:12:49<14:53:48, 7.18s/it] {'loss': 0.366, 'grad_norm': 0.6193760337652469, 'learning_rate': 7.018782747025161e-06, 'epoch': 0.39} + 39%|███▊ | 4714/12188 [10:12:49<14:53:48, 7.18s/it] 39%|███▊ | 4715/12188 [10:12:58<16:02:19, 7.73s/it] {'loss': 0.3666, 'grad_norm': 0.6606956222650053, 'learning_rate': 7.017567087124175e-06, 'epoch': 0.39} + 39%|███▊ | 4715/12188 [10:12:58<16:02:19, 7.73s/it] 39%|███▊ | 4716/12188 [10:13:06<15:55:40, 7.67s/it] {'loss': 0.3492, 'grad_norm': 0.6263676130528987, 'learning_rate': 7.016351284745842e-06, 'epoch': 0.39} + 39%|███▊ | 4716/12188 [10:13:06<15:55:40, 7.67s/it] 39%|███▊ | 4717/12188 [10:13:13<15:21:57, 7.40s/it] {'loss': 0.3978, 'grad_norm': 0.6822459280734642, 'learning_rate': 7.015135339976021e-06, 'epoch': 0.39} + 39%|███▊ | 4717/12188 [10:13:13<15:21:57, 7.40s/it] 39%|███▊ | 4718/12188 [10:13:19<14:55:11, 7.19s/it] {'loss': 0.3681, 'grad_norm': 0.7894163223922203, 'learning_rate': 7.0139192529005805e-06, 'epoch': 0.39} + 39%|███▊ | 4718/12188 [10:13:19<14:55:11, 7.19s/it] 39%|███▊ | 4719/12188 [10:13:27<15:12:28, 7.33s/it] {'loss': 0.3601, 'grad_norm': 0.6662535739087185, 'learning_rate': 7.012703023605396e-06, 'epoch': 0.39} + 39%|███▊ | 4719/12188 [10:13:27<15:12:28, 7.33s/it] 39%|███▊ | 4720/12188 [10:13:34<15:21:39, 7.40s/it] {'loss': 0.3638, 'grad_norm': 0.6702994622159525, 'learning_rate': 7.011486652176359e-06, 'epoch': 0.39} + 39%|███▊ | 4720/12188 [10:13:34<15:21:39, 7.40s/it] 39%|███▊ | 4721/12188 [10:13:42<15:11:07, 7.32s/it] {'loss': 0.3292, 'grad_norm': 0.6953643805799282, 'learning_rate': 7.010270138699367e-06, 'epoch': 0.39} + 39%|███▊ | 4721/12188 [10:13:42<15:11:07, 7.32s/it] 39%|███▊ | 4722/12188 [10:13:49<15:32:45, 7.50s/it] {'loss': 0.3601, 'grad_norm': 0.6415383661506523, 'learning_rate': 7.0090534832603266e-06, 'epoch': 0.39} + 39%|███▊ | 4722/12188 [10:13:50<15:32:45, 7.50s/it] 39%|███▉ | 4723/12188 [10:13:57<15:15:54, 7.36s/it] {'loss': 0.3396, 'grad_norm': 0.6269068794905951, 'learning_rate': 7.007836685945155e-06, 'epoch': 0.39} + 39%|███▉ | 4723/12188 [10:13:57<15:15:54, 7.36s/it] 39%|███▉ | 4724/12188 [10:14:04<15:13:37, 7.34s/it] {'loss': 0.3381, 'grad_norm': 0.6914735298129079, 'learning_rate': 7.006619746839787e-06, 'epoch': 0.39} + 39%|███▉ | 4724/12188 [10:14:04<15:13:37, 7.34s/it] 39%|███▉ | 4725/12188 [10:14:11<14:59:23, 7.23s/it] {'loss': 0.3681, 'grad_norm': 0.6384224010476345, 'learning_rate': 7.0054026660301535e-06, 'epoch': 0.39} + 39%|███▉ | 4725/12188 [10:14:11<14:59:23, 7.23s/it] 39%|███▉ | 4726/12188 [10:14:18<14:49:17, 7.15s/it] {'loss': 0.3402, 'grad_norm': 0.7097910223925299, 'learning_rate': 7.004185443602206e-06, 'epoch': 0.39} + 39%|███▉ | 4726/12188 [10:14:18<14:49:17, 7.15s/it] 39%|███▉ | 4727/12188 [10:14:24<14:30:00, 7.00s/it] {'loss': 0.3071, 'grad_norm': 0.6776094609596632, 'learning_rate': 7.002968079641902e-06, 'epoch': 0.39} + 39%|███▉ | 4727/12188 [10:14:24<14:30:00, 7.00s/it] 39%|███▉ | 4728/12188 [10:14:31<14:26:57, 6.97s/it] {'loss': 0.3253, 'grad_norm': 0.7001188141864748, 'learning_rate': 7.00175057423521e-06, 'epoch': 0.39} + 39%|███▉ | 4728/12188 [10:14:31<14:26:57, 6.97s/it] 39%|███▉ | 4729/12188 [10:14:40<15:19:14, 7.39s/it] {'loss': 0.3597, 'grad_norm': 0.6861247189476901, 'learning_rate': 7.000532927468109e-06, 'epoch': 0.39} + 39%|███▉ | 4729/12188 [10:14:40<15:19:14, 7.39s/it] 39%|███▉ | 4730/12188 [10:14:47<15:03:56, 7.27s/it] {'loss': 0.3774, 'grad_norm': 0.6300299473712486, 'learning_rate': 6.999315139426588e-06, 'epoch': 0.39} + 39%|███▉ | 4730/12188 [10:14:47<15:03:56, 7.27s/it] 39%|███▉ | 4731/12188 [10:14:53<14:42:07, 7.10s/it] {'loss': 0.3369, 'grad_norm': 0.6691013538576442, 'learning_rate': 6.998097210196643e-06, 'epoch': 0.39} + 39%|███▉ | 4731/12188 [10:14:53<14:42:07, 7.10s/it] 39%|███▉ | 4732/12188 [10:15:01<14:50:45, 7.17s/it] {'loss': 0.3175, 'grad_norm': 0.5851513393778214, 'learning_rate': 6.996879139864283e-06, 'epoch': 0.39} + 39%|███▉ | 4732/12188 [10:15:01<14:50:45, 7.17s/it] 39%|███▉ | 4733/12188 [10:15:08<15:03:13, 7.27s/it] {'loss': 0.391, 'grad_norm': 0.6482868573356558, 'learning_rate': 6.995660928515527e-06, 'epoch': 0.39} + 39%|███▉ | 4733/12188 [10:15:08<15:03:13, 7.27s/it] 39%|███▉ | 4734/12188 [10:15:15<14:44:36, 7.12s/it] {'loss': 0.3695, 'grad_norm': 0.6588137734290144, 'learning_rate': 6.994442576236401e-06, 'epoch': 0.39} + 39%|███▉ | 4734/12188 [10:15:15<14:44:36, 7.12s/it] 39%|███▉ | 4735/12188 [10:15:22<14:46:35, 7.14s/it] {'loss': 0.3642, 'grad_norm': 0.6316762165263078, 'learning_rate': 6.993224083112946e-06, 'epoch': 0.39} + 39%|███▉ | 4735/12188 [10:15:22<14:46:35, 7.14s/it] 39%|███▉ | 4736/12188 [10:15:29<14:25:50, 6.97s/it] {'loss': 0.3422, 'grad_norm': 0.6999030550700606, 'learning_rate': 6.9920054492312086e-06, 'epoch': 0.39} + 39%|███▉ | 4736/12188 [10:15:29<14:25:50, 6.97s/it] 39%|███▉ | 4737/12188 [10:15:36<14:18:57, 6.92s/it] {'loss': 0.3355, 'grad_norm': 0.6425112603935651, 'learning_rate': 6.990786674677246e-06, 'epoch': 0.39} + 39%|███▉ | 4737/12188 [10:15:36<14:18:57, 6.92s/it] 39%|███▉ | 4738/12188 [10:15:42<14:14:44, 6.88s/it] {'loss': 0.3662, 'grad_norm': 0.658297009888357, 'learning_rate': 6.989567759537128e-06, 'epoch': 0.39} + 39%|███▉ | 4738/12188 [10:15:42<14:14:44, 6.88s/it] 39%|███▉ | 4739/12188 [10:15:51<15:04:24, 7.28s/it] {'loss': 0.3432, 'grad_norm': 0.6259424830299274, 'learning_rate': 6.98834870389693e-06, 'epoch': 0.39} + 39%|███▉ | 4739/12188 [10:15:51<15:04:24, 7.28s/it] 39%|███▉ | 4740/12188 [10:15:57<14:43:55, 7.12s/it] {'loss': 0.3664, 'grad_norm': 0.6633535411600685, 'learning_rate': 6.987129507842742e-06, 'epoch': 0.39} + 39%|███▉ | 4740/12188 [10:15:57<14:43:55, 7.12s/it] 39%|███▉ | 4741/12188 [10:16:04<14:31:02, 7.02s/it] {'loss': 0.3364, 'grad_norm': 0.6214158514972004, 'learning_rate': 6.985910171460659e-06, 'epoch': 0.39} + 39%|███▉ | 4741/12188 [10:16:04<14:31:02, 7.02s/it] 39%|███▉ | 4742/12188 [10:16:11<14:19:57, 6.93s/it] {'loss': 0.3664, 'grad_norm': 0.6755323131997197, 'learning_rate': 6.984690694836792e-06, 'epoch': 0.39} + 39%|███▉ | 4742/12188 [10:16:11<14:19:57, 6.93s/it] 39%|███▉ | 4743/12188 [10:16:18<14:28:20, 7.00s/it] {'loss': 0.358, 'grad_norm': 0.6475969520223085, 'learning_rate': 6.9834710780572565e-06, 'epoch': 0.39} + 39%|███▉ | 4743/12188 [10:16:18<14:28:20, 7.00s/it] 39%|███▉ | 4744/12188 [10:16:25<14:17:26, 6.91s/it] {'loss': 0.3778, 'grad_norm': 0.6320926644903382, 'learning_rate': 6.98225132120818e-06, 'epoch': 0.39} + 39%|███▉ | 4744/12188 [10:16:25<14:17:26, 6.91s/it] 39%|███▉ | 4745/12188 [10:16:32<14:16:23, 6.90s/it] {'loss': 0.3304, 'grad_norm': 0.6399108928703082, 'learning_rate': 6.981031424375701e-06, 'epoch': 0.39} + 39%|███▉ | 4745/12188 [10:16:32<14:16:23, 6.90s/it] 39%|███▉ | 4746/12188 [10:16:38<14:11:04, 6.86s/it] {'loss': 0.335, 'grad_norm': 0.6263686519654121, 'learning_rate': 6.979811387645964e-06, 'epoch': 0.39} + 39%|███▉ | 4746/12188 [10:16:38<14:11:04, 6.86s/it] 39%|███▉ | 4747/12188 [10:16:46<14:31:10, 7.02s/it] {'loss': 0.3162, 'grad_norm': 0.6164670206560524, 'learning_rate': 6.978591211105128e-06, 'epoch': 0.39} + 39%|███▉ | 4747/12188 [10:16:46<14:31:10, 7.02s/it] 39%|███▉ | 4748/12188 [10:16:54<15:28:18, 7.49s/it] {'loss': 0.3472, 'grad_norm': 0.6424928012027573, 'learning_rate': 6.9773708948393605e-06, 'epoch': 0.39} + 39%|███▉ | 4748/12188 [10:16:54<15:28:18, 7.49s/it] 39%|███▉ | 4749/12188 [10:17:01<14:54:22, 7.21s/it] {'loss': 0.3429, 'grad_norm': 0.619558839836847, 'learning_rate': 6.976150438934837e-06, 'epoch': 0.39} + 39%|███▉ | 4749/12188 [10:17:01<14:54:22, 7.21s/it] 39%|███▉ | 4750/12188 [10:17:11<16:27:52, 7.97s/it] {'loss': 0.3336, 'grad_norm': 0.6625317416390896, 'learning_rate': 6.9749298434777425e-06, 'epoch': 0.39} + 39%|███▉ | 4750/12188 [10:17:11<16:27:52, 7.97s/it] 39%|███▉ | 4751/12188 [10:17:17<15:39:40, 7.58s/it] {'loss': 0.3472, 'grad_norm': 0.6275629529314904, 'learning_rate': 6.973709108554277e-06, 'epoch': 0.39} + 39%|███▉ | 4751/12188 [10:17:17<15:39:40, 7.58s/it] 39%|███▉ | 4752/12188 [10:17:24<15:18:53, 7.41s/it] {'loss': 0.3204, 'grad_norm': 0.6502481414453936, 'learning_rate': 6.972488234250646e-06, 'epoch': 0.39} + 39%|███▉ | 4752/12188 [10:17:24<15:18:53, 7.41s/it] 39%|███▉ | 4753/12188 [10:17:31<15:00:20, 7.27s/it] {'loss': 0.3423, 'grad_norm': 0.6165199309615962, 'learning_rate': 6.971267220653064e-06, 'epoch': 0.39} + 39%|███▉ | 4753/12188 [10:17:31<15:00:20, 7.27s/it] 39%|███▉ | 4754/12188 [10:17:38<14:49:53, 7.18s/it] {'loss': 0.3577, 'grad_norm': 0.599839199175915, 'learning_rate': 6.970046067847758e-06, 'epoch': 0.39} + 39%|███▉ | 4754/12188 [10:17:38<14:49:53, 7.18s/it] 39%|███▉ | 4755/12188 [10:17:45<14:30:16, 7.03s/it] {'loss': 0.3403, 'grad_norm': 0.6247209257298942, 'learning_rate': 6.9688247759209635e-06, 'epoch': 0.39} + 39%|███▉ | 4755/12188 [10:17:45<14:30:16, 7.03s/it] 39%|███▉ | 4756/12188 [10:17:53<14:57:00, 7.24s/it] {'loss': 0.3382, 'grad_norm': 0.6106956200584188, 'learning_rate': 6.967603344958926e-06, 'epoch': 0.39} + 39%|███▉ | 4756/12188 [10:17:53<14:57:00, 7.24s/it] 39%|███▉ | 4757/12188 [10:18:01<15:28:14, 7.49s/it] {'loss': 0.3525, 'grad_norm': 0.5773732816578326, 'learning_rate': 6.9663817750479035e-06, 'epoch': 0.39} + 39%|███▉ | 4757/12188 [10:18:01<15:28:14, 7.49s/it] 39%|███▉ | 4758/12188 [10:18:08<15:07:22, 7.33s/it] {'loss': 0.3369, 'grad_norm': 0.6149089354400228, 'learning_rate': 6.965160066274157e-06, 'epoch': 0.39} + 39%|███▉ | 4758/12188 [10:18:08<15:07:22, 7.33s/it] 39%|███▉ | 4759/12188 [10:18:15<15:04:36, 7.31s/it] {'loss': 0.3427, 'grad_norm': 0.5844371998220882, 'learning_rate': 6.963938218723966e-06, 'epoch': 0.39} + 39%|███▉ | 4759/12188 [10:18:15<15:04:36, 7.31s/it] 39%|███▉ | 4760/12188 [10:18:22<14:48:43, 7.18s/it] {'loss': 0.3502, 'grad_norm': 0.6510851788114717, 'learning_rate': 6.962716232483612e-06, 'epoch': 0.39} + 39%|███▉ | 4760/12188 [10:18:22<14:48:43, 7.18s/it] 39%|███▉ | 4761/12188 [10:18:30<15:21:41, 7.45s/it] {'loss': 0.3629, 'grad_norm': 0.7047810583237964, 'learning_rate': 6.961494107639391e-06, 'epoch': 0.39} + 39%|███▉ | 4761/12188 [10:18:30<15:21:41, 7.45s/it] 39%|███▉ | 4762/12188 [10:18:37<14:58:13, 7.26s/it] {'loss': 0.3544, 'grad_norm': 0.6973023946479312, 'learning_rate': 6.960271844277608e-06, 'epoch': 0.39} + 39%|███▉ | 4762/12188 [10:18:37<14:58:13, 7.26s/it] 39%|███▉ | 4763/12188 [10:18:44<14:49:28, 7.19s/it] {'loss': 0.3348, 'grad_norm': 0.6829735770051724, 'learning_rate': 6.959049442484577e-06, 'epoch': 0.39} + 39%|███▉ | 4763/12188 [10:18:44<14:49:28, 7.19s/it] 39%|███▉ | 4764/12188 [10:18:50<14:27:13, 7.01s/it] {'loss': 0.3728, 'grad_norm': 0.6371529124581703, 'learning_rate': 6.95782690234662e-06, 'epoch': 0.39} + 39%|███▉ | 4764/12188 [10:18:50<14:27:13, 7.01s/it] 39%|███▉ | 4765/12188 [10:18:57<14:19:15, 6.95s/it] {'loss': 0.3873, 'grad_norm': 0.7052422258959135, 'learning_rate': 6.956604223950074e-06, 'epoch': 0.39} + 39%|███▉ | 4765/12188 [10:18:57<14:19:15, 6.95s/it] 39%|███▉ | 4766/12188 [10:19:04<14:20:38, 6.96s/it] {'loss': 0.3528, 'grad_norm': 0.6304363967385775, 'learning_rate': 6.955381407381283e-06, 'epoch': 0.39} + 39%|███▉ | 4766/12188 [10:19:04<14:20:38, 6.96s/it] 39%|███▉ | 4767/12188 [10:19:11<14:32:26, 7.05s/it] {'loss': 0.3481, 'grad_norm': 0.6293273669895987, 'learning_rate': 6.9541584527265975e-06, 'epoch': 0.39} + 39%|███▉ | 4767/12188 [10:19:11<14:32:26, 7.05s/it] 39%|███▉ | 4768/12188 [10:19:18<14:34:40, 7.07s/it] {'loss': 0.3682, 'grad_norm': 0.6685522247591779, 'learning_rate': 6.952935360072379e-06, 'epoch': 0.39} + 39%|███▉ | 4768/12188 [10:19:18<14:34:40, 7.07s/it] 39%|███▉ | 4769/12188 [10:19:25<14:32:01, 7.05s/it] {'loss': 0.3385, 'grad_norm': 0.602957969040934, 'learning_rate': 6.9517121295050046e-06, 'epoch': 0.39} + 39%|███▉ | 4769/12188 [10:19:25<14:32:01, 7.05s/it] 39%|███▉ | 4770/12188 [10:19:33<14:52:57, 7.22s/it] {'loss': 0.3659, 'grad_norm': 0.7145231343047503, 'learning_rate': 6.950488761110856e-06, 'epoch': 0.39} + 39%|███▉ | 4770/12188 [10:19:33<14:52:57, 7.22s/it] 39%|███▉ | 4771/12188 [10:19:41<15:03:41, 7.31s/it] {'loss': 0.3547, 'grad_norm': 0.6596480377413894, 'learning_rate': 6.949265254976325e-06, 'epoch': 0.39} + 39%|███▉ | 4771/12188 [10:19:41<15:03:41, 7.31s/it] 39%|███▉ | 4772/12188 [10:19:48<14:54:21, 7.24s/it] {'loss': 0.3227, 'grad_norm': 0.643969330439757, 'learning_rate': 6.948041611187812e-06, 'epoch': 0.39} + 39%|███▉ | 4772/12188 [10:19:48<14:54:21, 7.24s/it] 39%|███▉ | 4773/12188 [10:19:54<14:37:45, 7.10s/it] {'loss': 0.3337, 'grad_norm': 0.6557457633520409, 'learning_rate': 6.9468178298317325e-06, 'epoch': 0.39} + 39%|███▉ | 4773/12188 [10:19:54<14:37:45, 7.10s/it] 39%|███▉ | 4774/12188 [10:20:01<14:29:02, 7.03s/it] {'loss': 0.3514, 'grad_norm': 0.6349849378808968, 'learning_rate': 6.945593910994503e-06, 'epoch': 0.39} + 39%|███▉ | 4774/12188 [10:20:01<14:29:02, 7.03s/it] 39%|███▉ | 4775/12188 [10:20:08<14:28:30, 7.03s/it] {'loss': 0.3942, 'grad_norm': 0.7014558201236325, 'learning_rate': 6.944369854762559e-06, 'epoch': 0.39} + 39%|███▉ | 4775/12188 [10:20:08<14:28:30, 7.03s/it] 39%|███▉ | 4776/12188 [10:20:15<14:14:35, 6.92s/it] {'loss': 0.3435, 'grad_norm': 0.6589549774992697, 'learning_rate': 6.94314566122234e-06, 'epoch': 0.39} + 39%|███▉ | 4776/12188 [10:20:15<14:14:35, 6.92s/it] 39%|███▉ | 4777/12188 [10:20:22<14:34:15, 7.08s/it] {'loss': 0.3748, 'grad_norm': 0.6048995604885627, 'learning_rate': 6.941921330460295e-06, 'epoch': 0.39} + 39%|███▉ | 4777/12188 [10:20:22<14:34:15, 7.08s/it] 39%|███▉ | 4778/12188 [10:20:30<14:35:01, 7.09s/it] {'loss': 0.3267, 'grad_norm': 0.672930565852243, 'learning_rate': 6.940696862562886e-06, 'epoch': 0.39} + 39%|███▉ | 4778/12188 [10:20:30<14:35:01, 7.09s/it] 39%|███▉ | 4779/12188 [10:20:37<14:40:05, 7.13s/it] {'loss': 0.3312, 'grad_norm': 0.6350505959890812, 'learning_rate': 6.939472257616583e-06, 'epoch': 0.39} + 39%|███▉ | 4779/12188 [10:20:37<14:40:05, 7.13s/it] 39%|███▉ | 4780/12188 [10:20:44<14:32:25, 7.07s/it] {'loss': 0.3337, 'grad_norm': 0.6809206674827769, 'learning_rate': 6.938247515707866e-06, 'epoch': 0.39} + 39%|███▉ | 4780/12188 [10:20:44<14:32:25, 7.07s/it] 39%|███▉ | 4781/12188 [10:20:51<14:55:23, 7.25s/it] {'loss': 0.2992, 'grad_norm': 0.6100557447931385, 'learning_rate': 6.937022636923222e-06, 'epoch': 0.39} + 39%|███▉ | 4781/12188 [10:20:51<14:55:23, 7.25s/it] 39%|███▉ | 4782/12188 [10:20:59<14:56:20, 7.26s/it] {'loss': 0.3131, 'grad_norm': 0.6306807270934633, 'learning_rate': 6.935797621349151e-06, 'epoch': 0.39} + 39%|███▉ | 4782/12188 [10:20:59<14:56:20, 7.26s/it] 39%|███▉ | 4783/12188 [10:21:06<14:53:12, 7.24s/it] {'loss': 0.3275, 'grad_norm': 0.6546533574034628, 'learning_rate': 6.934572469072163e-06, 'epoch': 0.39} + 39%|███▉ | 4783/12188 [10:21:06<14:53:12, 7.24s/it] 39%|███▉ | 4784/12188 [10:21:13<14:56:03, 7.26s/it] {'loss': 0.3665, 'grad_norm': 0.6672860831985065, 'learning_rate': 6.933347180178775e-06, 'epoch': 0.39} + 39%|███▉ | 4784/12188 [10:21:13<14:56:03, 7.26s/it] 39%|███▉ | 4785/12188 [10:21:20<14:48:16, 7.20s/it] {'loss': 0.3563, 'grad_norm': 0.6451134182690936, 'learning_rate': 6.932121754755516e-06, 'epoch': 0.39} + 39%|███▉ | 4785/12188 [10:21:20<14:48:16, 7.20s/it] 39%|███▉ | 4786/12188 [10:21:28<15:04:26, 7.33s/it] {'loss': 0.3449, 'grad_norm': 0.6725702723296628, 'learning_rate': 6.930896192888923e-06, 'epoch': 0.39} + 39%|███▉ | 4786/12188 [10:21:28<15:04:26, 7.33s/it] 39%|███▉ | 4787/12188 [10:21:35<14:39:07, 7.13s/it] {'loss': 0.4329, 'grad_norm': 0.706390511414826, 'learning_rate': 6.929670494665542e-06, 'epoch': 0.39} + 39%|███▉ | 4787/12188 [10:21:35<14:39:07, 7.13s/it] 39%|███▉ | 4788/12188 [10:21:41<14:19:25, 6.97s/it] {'loss': 0.3809, 'grad_norm': 0.6637343015918635, 'learning_rate': 6.928444660171931e-06, 'epoch': 0.39} + 39%|███▉ | 4788/12188 [10:21:41<14:19:25, 6.97s/it] 39%|███▉ | 4789/12188 [10:21:48<14:30:30, 7.06s/it] {'loss': 0.3605, 'grad_norm': 1.1373887246239056, 'learning_rate': 6.927218689494657e-06, 'epoch': 0.39} + 39%|███▉ | 4789/12188 [10:21:48<14:30:30, 7.06s/it] 39%|███▉ | 4790/12188 [10:21:55<14:12:58, 6.92s/it] {'loss': 0.3306, 'grad_norm': 0.6113426572839179, 'learning_rate': 6.9259925827202955e-06, 'epoch': 0.39} + 39%|███▉ | 4790/12188 [10:21:55<14:12:58, 6.92s/it] 39%|███▉ | 4791/12188 [10:22:02<14:14:43, 6.93s/it] {'loss': 0.3479, 'grad_norm': 0.5987631070187168, 'learning_rate': 6.924766339935431e-06, 'epoch': 0.39} + 39%|███▉ | 4791/12188 [10:22:02<14:14:43, 6.93s/it] 39%|███▉ | 4792/12188 [10:22:09<14:18:12, 6.96s/it] {'loss': 0.3136, 'grad_norm': 0.6379477389840761, 'learning_rate': 6.92353996122666e-06, 'epoch': 0.39} + 39%|███▉ | 4792/12188 [10:22:09<14:18:12, 6.96s/it] 39%|███▉ | 4793/12188 [10:22:17<15:08:39, 7.37s/it] {'loss': 0.3278, 'grad_norm': 0.6161290627595079, 'learning_rate': 6.92231344668059e-06, 'epoch': 0.39} + 39%|███▉ | 4793/12188 [10:22:17<15:08:39, 7.37s/it] 39%|███▉ | 4794/12188 [10:22:26<15:48:18, 7.70s/it] {'loss': 0.3379, 'grad_norm': 0.6313101456642353, 'learning_rate': 6.921086796383831e-06, 'epoch': 0.39} + 39%|███▉ | 4794/12188 [10:22:26<15:48:18, 7.70s/it] 39%|███▉ | 4795/12188 [10:22:33<15:13:59, 7.42s/it] {'loss': 0.3331, 'grad_norm': 0.6288439578552399, 'learning_rate': 6.919860010423008e-06, 'epoch': 0.39} + 39%|███▉ | 4795/12188 [10:22:33<15:13:59, 7.42s/it] 39%|███▉ | 4796/12188 [10:22:40<15:16:27, 7.44s/it] {'loss': 0.3397, 'grad_norm': 0.6746071299893998, 'learning_rate': 6.918633088884756e-06, 'epoch': 0.39} + 39%|███▉ | 4796/12188 [10:22:40<15:16:27, 7.44s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 90, in pil_loader + return img.convert("RGB") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 993, in convert + self.load() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 319, in load + raise _get_oserror(err_code, encoder=False) +OSError: unrecognized data stream contents when reading image file +[Try #0] Failed to fetch sample 6015347 in VC:s3://gui-agent/data_20250623/windows_augment/images. Exception: unrecognized data stream contents when reading image file +Problematic sample: {'image': 'inventor/20250511_134843_1/images/before_screenshot_1_id_149_internvl_position_crop_0_grounding_instructions_point_o_paste.png', 'conversations': [{'from': 'human', 'value': "\nLocate the UI element referred to: Located in the middle section of the top toolbar ribbon in Autodesk Inventor, positioned within the 'Modify' panel group. It sits in the bottom row of the ribbon interface, below other modeling tools and between other function groups in the main toolbar area."}, {'from': 'gpt', 'value': "Located in the middle section of the top toolbar ribbon in Autodesk Inventor, positioned within the 'Modify' panel group. It sits in the bottom row of the ribbon interface, below other modeling tools and between other function groups in the main toolbar area.[[305, 539, 409, 547]]"}], 'width': 3600, 'height': 2338} + 39%|███▉ | 4797/12188 [10:22:48<15:52:11, 7.73s/it] {'loss': 0.3237, 'grad_norm': 0.6575442256598377, 'learning_rate': 6.917406031855719e-06, 'epoch': 0.39} + 39%|███▉ | 4797/12188 [10:22:48<15:52:11, 7.73s/it] 39%|███▉ | 4798/12188 [10:22:56<15:54:15, 7.75s/it] {'loss': 0.3471, 'grad_norm': 0.6483836202881701, 'learning_rate': 6.916178839422548e-06, 'epoch': 0.39} + 39%|███▉ | 4798/12188 [10:22:56<15:54:15, 7.75s/it] 39%|███▉ | 4799/12188 [10:23:04<15:56:02, 7.76s/it] {'loss': 0.312, 'grad_norm': 0.622209958183057, 'learning_rate': 6.914951511671905e-06, 'epoch': 0.39} + 39%|███▉ | 4799/12188 [10:23:04<15:56:02, 7.76s/it] 39%|███▉ | 4800/12188 [10:23:11<15:18:59, 7.46s/it] {'loss': 0.3532, 'grad_norm': 0.5983020345672395, 'learning_rate': 6.913724048690464e-06, 'epoch': 0.39} + 39%|███▉ | 4800/12188 [10:23:11<15:18:59, 7.46s/it] 39%|███▉ | 4801/12188 [10:23:19<16:04:31, 7.83s/it] {'loss': 0.3299, 'grad_norm': 0.6672562687153197, 'learning_rate': 6.9124964505649065e-06, 'epoch': 0.39} + 39%|███▉ | 4801/12188 [10:23:19<16:04:31, 7.83s/it] 39%|███▉ | 4802/12188 [10:23:27<15:58:17, 7.78s/it] {'loss': 0.3389, 'grad_norm': 0.6716284113644152, 'learning_rate': 6.9112687173819205e-06, 'epoch': 0.39} + 39%|███▉ | 4802/12188 [10:23:27<15:58:17, 7.78s/it] 39%|███▉ | 4803/12188 [10:23:34<15:31:18, 7.57s/it] {'loss': 0.3645, 'grad_norm': 0.6253787213730002, 'learning_rate': 6.910040849228208e-06, 'epoch': 0.39} + 39%|███▉ | 4803/12188 [10:23:34<15:31:18, 7.57s/it] 39%|███▉ | 4804/12188 [10:23:41<15:05:44, 7.36s/it] {'loss': 0.3676, 'grad_norm': 0.64971855966765, 'learning_rate': 6.908812846190479e-06, 'epoch': 0.39} + 39%|███▉ | 4804/12188 [10:23:41<15:05:44, 7.36s/it] 39%|███▉ | 4805/12188 [10:23:49<15:38:59, 7.63s/it] {'loss': 0.3762, 'grad_norm': 0.6239684570595055, 'learning_rate': 6.9075847083554546e-06, 'epoch': 0.39} + 39%|███▉ | 4805/12188 [10:23:49<15:38:59, 7.63s/it] 39%|███▉ | 4806/12188 [10:23:56<15:10:32, 7.40s/it] {'loss': 0.3199, 'grad_norm': 0.6583754040666003, 'learning_rate': 6.9063564358098636e-06, 'epoch': 0.39} + 39%|███▉ | 4806/12188 [10:23:56<15:10:32, 7.40s/it] 39%|███▉ | 4807/12188 [10:24:04<15:14:16, 7.43s/it] {'loss': 0.309, 'grad_norm': 0.5919608032701172, 'learning_rate': 6.905128028640443e-06, 'epoch': 0.39} + 39%|███▉ | 4807/12188 [10:24:04<15:14:16, 7.43s/it] 39%|███▉ | 4808/12188 [10:24:12<15:39:27, 7.64s/it] {'loss': 0.3529, 'grad_norm': 0.6201950000374403, 'learning_rate': 6.903899486933943e-06, 'epoch': 0.39} + 39%|███▉ | 4808/12188 [10:24:12<15:39:27, 7.64s/it] 39%|███▉ | 4809/12188 [10:24:19<15:35:31, 7.61s/it] {'loss': 0.3537, 'grad_norm': 0.6081802865466093, 'learning_rate': 6.902670810777119e-06, 'epoch': 0.39} + 39%|███▉ | 4809/12188 [10:24:19<15:35:31, 7.61s/it] 39%|███▉ | 4810/12188 [10:24:26<15:14:05, 7.43s/it] {'loss': 0.4054, 'grad_norm': 0.6771028884233132, 'learning_rate': 6.9014420002567395e-06, 'epoch': 0.39} + 39%|███▉ | 4810/12188 [10:24:26<15:14:05, 7.43s/it] 39%|███▉ | 4811/12188 [10:24:34<15:05:08, 7.36s/it] {'loss': 0.3766, 'grad_norm': 0.6657438417185029, 'learning_rate': 6.900213055459581e-06, 'epoch': 0.39} + 39%|███▉ | 4811/12188 [10:24:34<15:05:08, 7.36s/it] 39%|███▉ | 4812/12188 [10:24:41<15:15:28, 7.45s/it] {'loss': 0.3366, 'grad_norm': 0.6046718300919297, 'learning_rate': 6.898983976472429e-06, 'epoch': 0.39} + 39%|███▉ | 4812/12188 [10:24:41<15:15:28, 7.45s/it] 39%|███▉ | 4813/12188 [10:24:48<14:53:14, 7.27s/it] {'loss': 0.3521, 'grad_norm': 0.6376820944795345, 'learning_rate': 6.8977547633820785e-06, 'epoch': 0.39} + 39%|███▉ | 4813/12188 [10:24:48<14:53:14, 7.27s/it] 39%|███▉ | 4814/12188 [10:24:55<14:43:08, 7.19s/it] {'loss': 0.2957, 'grad_norm': 0.6022045292853576, 'learning_rate': 6.896525416275337e-06, 'epoch': 0.39} + 39%|███▉ | 4814/12188 [10:24:55<14:43:08, 7.19s/it] 40%|███▉ | 4815/12188 [10:25:02<14:42:52, 7.18s/it] {'loss': 0.3373, 'grad_norm': 0.6370485730439917, 'learning_rate': 6.895295935239017e-06, 'epoch': 0.4} + 40%|███▉ | 4815/12188 [10:25:02<14:42:52, 7.18s/it] 40%|███▉ | 4816/12188 [10:25:09<14:24:35, 7.04s/it] {'loss': 0.3514, 'grad_norm': 0.6220236795135651, 'learning_rate': 6.8940663203599445e-06, 'epoch': 0.4} + 40%|███▉ | 4816/12188 [10:25:09<14:24:35, 7.04s/it] 40%|███▉ | 4817/12188 [10:25:16<14:26:52, 7.06s/it] {'loss': 0.3398, 'grad_norm': 0.6437827855758046, 'learning_rate': 6.89283657172495e-06, 'epoch': 0.4} + 40%|███▉ | 4817/12188 [10:25:16<14:26:52, 7.06s/it] 40%|███▉ | 4818/12188 [10:25:23<14:33:06, 7.11s/it] {'loss': 0.354, 'grad_norm': 0.6493984153714338, 'learning_rate': 6.891606689420877e-06, 'epoch': 0.4} + 40%|███▉ | 4818/12188 [10:25:23<14:33:06, 7.11s/it] 40%|███▉ | 4819/12188 [10:25:30<14:29:31, 7.08s/it] {'loss': 0.3824, 'grad_norm': 0.6583168527825255, 'learning_rate': 6.89037667353458e-06, 'epoch': 0.4} + 40%|███▉ | 4819/12188 [10:25:30<14:29:31, 7.08s/it] 40%|███▉ | 4820/12188 [10:25:38<14:41:26, 7.18s/it] {'loss': 0.396, 'grad_norm': 0.6669034828484723, 'learning_rate': 6.8891465241529195e-06, 'epoch': 0.4} + 40%|███▉ | 4820/12188 [10:25:38<14:41:26, 7.18s/it] 40%|███▉ | 4821/12188 [10:25:46<15:26:58, 7.55s/it] {'loss': 0.3405, 'grad_norm': 0.6516830117199849, 'learning_rate': 6.887916241362766e-06, 'epoch': 0.4} + 40%|███▉ | 4821/12188 [10:25:46<15:26:58, 7.55s/it] 40%|███▉ | 4822/12188 [10:25:53<15:14:30, 7.45s/it] {'loss': 0.368, 'grad_norm': 0.714631361800471, 'learning_rate': 6.886685825251002e-06, 'epoch': 0.4} + 40%|███▉ | 4822/12188 [10:25:53<15:14:30, 7.45s/it] 40%|███▉ | 4823/12188 [10:26:00<14:55:30, 7.30s/it] {'loss': 0.3683, 'grad_norm': 0.6644434239798798, 'learning_rate': 6.885455275904516e-06, 'epoch': 0.4} + 40%|███▉ | 4823/12188 [10:26:00<14:55:30, 7.30s/it] 40%|███▉ | 4824/12188 [10:26:07<14:33:01, 7.11s/it] {'loss': 0.3574, 'grad_norm': 0.6514158241121741, 'learning_rate': 6.884224593410207e-06, 'epoch': 0.4} + 40%|███▉ | 4824/12188 [10:26:07<14:33:01, 7.11s/it] 40%|███▉ | 4825/12188 [10:26:14<14:19:41, 7.01s/it] {'loss': 0.3546, 'grad_norm': 0.6222904696722746, 'learning_rate': 6.882993777854985e-06, 'epoch': 0.4} + 40%|███▉ | 4825/12188 [10:26:14<14:19:41, 7.01s/it] 40%|███▉ | 4826/12188 [10:26:21<14:11:43, 6.94s/it] {'loss': 0.3269, 'grad_norm': 0.6122790374083659, 'learning_rate': 6.881762829325767e-06, 'epoch': 0.4} + 40%|███▉ | 4826/12188 [10:26:21<14:11:43, 6.94s/it] 40%|███▉ | 4827/12188 [10:26:27<14:04:01, 6.88s/it] {'loss': 0.3217, 'grad_norm': 0.6073385437775973, 'learning_rate': 6.88053174790948e-06, 'epoch': 0.4} + 40%|███▉ | 4827/12188 [10:26:27<14:04:01, 6.88s/it] 40%|███▉ | 4828/12188 [10:26:34<13:55:45, 6.81s/it] {'loss': 0.3361, 'grad_norm': 0.6754852127835899, 'learning_rate': 6.879300533693065e-06, 'epoch': 0.4} + 40%|███▉ | 4828/12188 [10:26:34<13:55:45, 6.81s/it] 40%|███▉ | 4829/12188 [10:26:40<13:46:15, 6.74s/it] {'loss': 0.3297, 'grad_norm': 0.6467397096926741, 'learning_rate': 6.878069186763466e-06, 'epoch': 0.4} + 40%|███▉ | 4829/12188 [10:26:40<13:46:15, 6.74s/it] 40%|███▉ | 4830/12188 [10:26:48<14:05:07, 6.89s/it] {'loss': 0.3191, 'grad_norm': 0.64847346593117, 'learning_rate': 6.876837707207635e-06, 'epoch': 0.4} + 40%|███▉ | 4830/12188 [10:26:48<14:05:07, 6.89s/it] 40%|███▉ | 4831/12188 [10:26:55<14:18:35, 7.00s/it] {'loss': 0.3554, 'grad_norm': 0.6649960380245264, 'learning_rate': 6.8756060951125415e-06, 'epoch': 0.4} + 40%|███▉ | 4831/12188 [10:26:55<14:18:35, 7.00s/it] 40%|███▉ | 4832/12188 [10:27:03<15:10:42, 7.43s/it] {'loss': 0.3956, 'grad_norm': 0.6499942308292885, 'learning_rate': 6.874374350565161e-06, 'epoch': 0.4} + 40%|███▉ | 4832/12188 [10:27:03<15:10:42, 7.43s/it] 40%|███▉ | 4833/12188 [10:27:11<15:00:50, 7.35s/it] {'loss': 0.379, 'grad_norm': 0.6958343652897404, 'learning_rate': 6.873142473652473e-06, 'epoch': 0.4} + 40%|███▉ | 4833/12188 [10:27:11<15:00:50, 7.35s/it] 40%|███▉ | 4834/12188 [10:27:18<15:16:07, 7.47s/it] {'loss': 0.347, 'grad_norm': 0.6619277088540484, 'learning_rate': 6.871910464461475e-06, 'epoch': 0.4} + 40%|███▉ | 4834/12188 [10:27:18<15:16:07, 7.47s/it] 40%|███▉ | 4835/12188 [10:27:26<15:06:07, 7.39s/it] {'loss': 0.3473, 'grad_norm': 0.6680023795387767, 'learning_rate': 6.870678323079165e-06, 'epoch': 0.4} + 40%|███▉ | 4835/12188 [10:27:26<15:06:07, 7.39s/it] 40%|███▉ | 4836/12188 [10:27:33<14:57:29, 7.32s/it] {'loss': 0.3321, 'grad_norm': 0.7416131761287764, 'learning_rate': 6.86944604959256e-06, 'epoch': 0.4} + 40%|███▉ | 4836/12188 [10:27:33<14:57:29, 7.32s/it] 40%|███▉ | 4837/12188 [10:27:42<15:54:14, 7.79s/it] {'loss': 0.3369, 'grad_norm': 0.6455632793979039, 'learning_rate': 6.868213644088677e-06, 'epoch': 0.4} + 40%|███▉ | 4837/12188 [10:27:42<15:54:14, 7.79s/it] 40%|███▉ | 4838/12188 [10:27:49<15:22:56, 7.53s/it] {'loss': 0.3319, 'grad_norm': 0.6222302120172022, 'learning_rate': 6.866981106654548e-06, 'epoch': 0.4} + 40%|███▉ | 4838/12188 [10:27:49<15:22:56, 7.53s/it] 40%|███▉ | 4839/12188 [10:27:57<15:42:33, 7.70s/it] {'loss': 0.3403, 'grad_norm': 0.5885309838529184, 'learning_rate': 6.865748437377213e-06, 'epoch': 0.4} + 40%|███▉ | 4839/12188 [10:27:57<15:42:33, 7.70s/it] 40%|███▉ | 4840/12188 [10:28:04<15:33:01, 7.62s/it] {'loss': 0.3566, 'grad_norm': 0.6347703526263198, 'learning_rate': 6.86451563634372e-06, 'epoch': 0.4} + 40%|███▉ | 4840/12188 [10:28:04<15:33:01, 7.62s/it] 40%|███▉ | 4841/12188 [10:28:12<15:47:59, 7.74s/it] {'loss': 0.3469, 'grad_norm': 0.6281177777811634, 'learning_rate': 6.863282703641129e-06, 'epoch': 0.4} + 40%|███▉ | 4841/12188 [10:28:12<15:47:59, 7.74s/it] 40%|███▉ | 4842/12188 [10:28:19<15:04:25, 7.39s/it] {'loss': 0.3452, 'grad_norm': 0.5944925392232031, 'learning_rate': 6.862049639356508e-06, 'epoch': 0.4} + 40%|███▉ | 4842/12188 [10:28:19<15:04:25, 7.39s/it] 40%|███▉ | 4843/12188 [10:28:25<14:38:22, 7.18s/it] {'loss': 0.3568, 'grad_norm': 0.6633579281609273, 'learning_rate': 6.860816443576932e-06, 'epoch': 0.4} + 40%|███▉ | 4843/12188 [10:28:25<14:38:22, 7.18s/it] 40%|███▉ | 4844/12188 [10:28:32<14:23:48, 7.06s/it] {'loss': 0.3628, 'grad_norm': 0.6644578254553546, 'learning_rate': 6.859583116389489e-06, 'epoch': 0.4} + 40%|███▉ | 4844/12188 [10:28:32<14:23:48, 7.06s/it] 40%|███▉ | 4845/12188 [10:28:41<15:16:27, 7.49s/it] {'loss': 0.3526, 'grad_norm': 0.6139552453709749, 'learning_rate': 6.858349657881273e-06, 'epoch': 0.4} + 40%|███▉ | 4845/12188 [10:28:41<15:16:27, 7.49s/it] 40%|███▉ | 4846/12188 [10:28:47<14:52:47, 7.30s/it] {'loss': 0.3519, 'grad_norm': 0.6349654333355003, 'learning_rate': 6.85711606813939e-06, 'epoch': 0.4} + 40%|███▉ | 4846/12188 [10:28:47<14:52:47, 7.30s/it] 40%|███▉ | 4847/12188 [10:28:54<14:30:28, 7.11s/it] {'loss': 0.3697, 'grad_norm': 0.6324761051290656, 'learning_rate': 6.855882347250955e-06, 'epoch': 0.4} + 40%|███▉ | 4847/12188 [10:28:54<14:30:28, 7.11s/it] 40%|███▉ | 4848/12188 [10:29:01<14:27:54, 7.09s/it] {'loss': 0.3567, 'grad_norm': 0.6588464490470071, 'learning_rate': 6.8546484953030885e-06, 'epoch': 0.4} + 40%|███▉ | 4848/12188 [10:29:01<14:27:54, 7.09s/it] 40%|███▉ | 4849/12188 [10:29:09<15:04:43, 7.40s/it] {'loss': 0.3395, 'grad_norm': 0.6405467792662278, 'learning_rate': 6.853414512382928e-06, 'epoch': 0.4} + 40%|███▉ | 4849/12188 [10:29:09<15:04:43, 7.40s/it] 40%|███▉ | 4850/12188 [10:29:17<15:04:10, 7.39s/it] {'loss': 0.3396, 'grad_norm': 0.7078641007661302, 'learning_rate': 6.85218039857761e-06, 'epoch': 0.4} + 40%|███▉ | 4850/12188 [10:29:17<15:04:10, 7.39s/it] 40%|███▉ | 4851/12188 [10:29:24<15:14:17, 7.48s/it] {'loss': 0.4045, 'grad_norm': 0.6946260115631758, 'learning_rate': 6.8509461539742894e-06, 'epoch': 0.4} + 40%|███▉ | 4851/12188 [10:29:24<15:14:17, 7.48s/it] 40%|███▉ | 4852/12188 [10:29:31<14:34:32, 7.15s/it] {'loss': 0.2994, 'grad_norm': 0.6280702622477675, 'learning_rate': 6.849711778660124e-06, 'epoch': 0.4} + 40%|███▉ | 4852/12188 [10:29:31<14:34:32, 7.15s/it] 40%|███▉ | 4853/12188 [10:29:38<14:38:41, 7.19s/it] {'loss': 0.2859, 'grad_norm': 0.6111243966309688, 'learning_rate': 6.848477272722284e-06, 'epoch': 0.4} + 40%|███▉ | 4853/12188 [10:29:38<14:38:41, 7.19s/it] 40%|███▉ | 4854/12188 [10:29:45<14:30:32, 7.12s/it] {'loss': 0.3441, 'grad_norm': 0.619966357181228, 'learning_rate': 6.84724263624795e-06, 'epoch': 0.4} + 40%|███▉ | 4854/12188 [10:29:45<14:30:32, 7.12s/it] 40%|███▉ | 4855/12188 [10:29:53<15:15:38, 7.49s/it] {'loss': 0.3093, 'grad_norm': 0.6343226024788606, 'learning_rate': 6.846007869324308e-06, 'epoch': 0.4} + 40%|███▉ | 4855/12188 [10:29:53<15:15:38, 7.49s/it] 40%|███▉ | 4856/12188 [10:30:01<15:34:59, 7.65s/it] {'loss': 0.3639, 'grad_norm': 0.6261851108471559, 'learning_rate': 6.844772972038558e-06, 'epoch': 0.4} + 40%|███▉ | 4856/12188 [10:30:01<15:34:59, 7.65s/it] 40%|███▉ | 4857/12188 [10:30:08<14:52:58, 7.31s/it] {'loss': 0.3334, 'grad_norm': 0.6844061494218175, 'learning_rate': 6.843537944477904e-06, 'epoch': 0.4} + 40%|███▉ | 4857/12188 [10:30:08<14:52:58, 7.31s/it] 40%|███▉ | 4858/12188 [10:30:15<14:39:59, 7.20s/it] {'loss': 0.3591, 'grad_norm': 0.6459717334733264, 'learning_rate': 6.84230278672956e-06, 'epoch': 0.4} + 40%|███▉ | 4858/12188 [10:30:15<14:39:59, 7.20s/it] 40%|███▉ | 4859/12188 [10:30:21<14:14:47, 7.00s/it] {'loss': 0.4015, 'grad_norm': 0.7299925733092899, 'learning_rate': 6.841067498880754e-06, 'epoch': 0.4} + 40%|███▉ | 4859/12188 [10:30:21<14:14:47, 7.00s/it] 40%|███▉ | 4860/12188 [10:30:28<13:55:47, 6.84s/it] {'loss': 0.3497, 'grad_norm': 0.7161113260341332, 'learning_rate': 6.83983208101872e-06, 'epoch': 0.4} + 40%|███▉ | 4860/12188 [10:30:28<13:55:47, 6.84s/it] 40%|███▉ | 4861/12188 [10:30:35<14:03:27, 6.91s/it] {'loss': 0.3539, 'grad_norm': 0.6425760829149234, 'learning_rate': 6.838596533230701e-06, 'epoch': 0.4} + 40%|███▉ | 4861/12188 [10:30:35<14:03:27, 6.91s/it] 40%|███▉ | 4862/12188 [10:30:42<14:21:13, 7.05s/it] {'loss': 0.3403, 'grad_norm': 0.6301688325566874, 'learning_rate': 6.837360855603947e-06, 'epoch': 0.4} + 40%|███▉ | 4862/12188 [10:30:42<14:21:13, 7.05s/it] 40%|███▉ | 4863/12188 [10:30:49<13:58:52, 6.87s/it] {'loss': 0.3553, 'grad_norm': 0.6805520913414699, 'learning_rate': 6.836125048225723e-06, 'epoch': 0.4} + 40%|███▉ | 4863/12188 [10:30:49<13:58:52, 6.87s/it] 40%|███▉ | 4864/12188 [10:30:56<14:13:56, 7.00s/it] {'loss': 0.3283, 'grad_norm': 0.7063855063989007, 'learning_rate': 6.8348891111832975e-06, 'epoch': 0.4} + 40%|███▉ | 4864/12188 [10:30:56<14:13:56, 7.00s/it] 40%|███▉ | 4865/12188 [10:31:03<14:06:07, 6.93s/it] {'loss': 0.3691, 'grad_norm': 0.6702787159612048, 'learning_rate': 6.83365304456395e-06, 'epoch': 0.4} + 40%|███▉ | 4865/12188 [10:31:03<14:06:07, 6.93s/it] 40%|███▉ | 4866/12188 [10:31:10<14:06:20, 6.94s/it] {'loss': 0.3403, 'grad_norm': 0.6634140536573462, 'learning_rate': 6.832416848454971e-06, 'epoch': 0.4} + 40%|███▉ | 4866/12188 [10:31:10<14:06:20, 6.94s/it] 40%|███▉ | 4867/12188 [10:31:16<13:58:00, 6.87s/it] {'loss': 0.38, 'grad_norm': 0.6489512292160619, 'learning_rate': 6.831180522943658e-06, 'epoch': 0.4} + 40%|███▉ | 4867/12188 [10:31:16<13:58:00, 6.87s/it] 40%|███▉ | 4868/12188 [10:31:24<14:25:51, 7.10s/it] {'loss': 0.3634, 'grad_norm': 0.6282844421674947, 'learning_rate': 6.829944068117319e-06, 'epoch': 0.4} + 40%|███▉ | 4868/12188 [10:31:24<14:25:51, 7.10s/it] 40%|███▉ | 4869/12188 [10:31:31<14:11:32, 6.98s/it] {'loss': 0.3291, 'grad_norm': 0.6422245918564127, 'learning_rate': 6.828707484063269e-06, 'epoch': 0.4} + 40%|███▉ | 4869/12188 [10:31:31<14:11:32, 6.98s/it] 40%|███▉ | 4870/12188 [10:31:38<14:35:34, 7.18s/it] {'loss': 0.3092, 'grad_norm': 0.5705389063779657, 'learning_rate': 6.827470770868837e-06, 'epoch': 0.4} + 40%|███▉ | 4870/12188 [10:31:38<14:35:34, 7.18s/it] 40%|███▉ | 4871/12188 [10:31:45<14:25:21, 7.10s/it] {'loss': 0.3865, 'grad_norm': 0.6389025387197856, 'learning_rate': 6.826233928621353e-06, 'epoch': 0.4} + 40%|███▉ | 4871/12188 [10:31:45<14:25:21, 7.10s/it] 40%|███▉ | 4872/12188 [10:31:54<15:14:47, 7.50s/it] {'loss': 0.3782, 'grad_norm': 0.5898315226321365, 'learning_rate': 6.824996957408163e-06, 'epoch': 0.4} + 40%|███▉ | 4872/12188 [10:31:54<15:14:47, 7.50s/it] 40%|███▉ | 4873/12188 [10:32:01<15:05:13, 7.42s/it] {'loss': 0.3421, 'grad_norm': 0.6102978692535678, 'learning_rate': 6.823759857316621e-06, 'epoch': 0.4} + 40%|███▉ | 4873/12188 [10:32:01<15:05:13, 7.42s/it] 40%|███▉ | 4874/12188 [10:32:08<14:52:28, 7.32s/it] {'loss': 0.3993, 'grad_norm': 0.6207864855069712, 'learning_rate': 6.822522628434087e-06, 'epoch': 0.4} + 40%|███▉ | 4874/12188 [10:32:08<14:52:28, 7.32s/it] 40%|███▉ | 4875/12188 [10:32:17<15:46:21, 7.76s/it] {'loss': 0.335, 'grad_norm': 0.7155506330570192, 'learning_rate': 6.821285270847934e-06, 'epoch': 0.4} + 40%|███▉ | 4875/12188 [10:32:17<15:46:21, 7.76s/it] 40%|████ | 4876/12188 [10:32:24<15:39:46, 7.71s/it] {'loss': 0.3161, 'grad_norm': 0.6233017277162617, 'learning_rate': 6.82004778464554e-06, 'epoch': 0.4} + 40%|████ | 4876/12188 [10:32:24<15:39:46, 7.71s/it] 40%|████ | 4877/12188 [10:32:31<15:02:45, 7.41s/it] {'loss': 0.371, 'grad_norm': 0.6416443613860174, 'learning_rate': 6.818810169914297e-06, 'epoch': 0.4} + 40%|████ | 4877/12188 [10:32:31<15:02:45, 7.41s/it] 40%|████ | 4878/12188 [10:32:39<15:01:17, 7.40s/it] {'loss': 0.3355, 'grad_norm': 0.6492735355893094, 'learning_rate': 6.817572426741601e-06, 'epoch': 0.4} + 40%|████ | 4878/12188 [10:32:39<15:01:17, 7.40s/it] 40%|████ | 4879/12188 [10:32:46<15:17:06, 7.53s/it] {'loss': 0.3633, 'grad_norm': 0.664958748799679, 'learning_rate': 6.8163345552148604e-06, 'epoch': 0.4} + 40%|████ | 4879/12188 [10:32:46<15:17:06, 7.53s/it] 40%|████ | 4880/12188 [10:32:54<15:37:43, 7.70s/it] {'loss': 0.3262, 'grad_norm': 0.6508217351597321, 'learning_rate': 6.815096555421492e-06, 'epoch': 0.4} + 40%|████ | 4880/12188 [10:32:54<15:37:43, 7.70s/it] 40%|████ | 4881/12188 [10:33:02<15:18:01, 7.54s/it] {'loss': 0.3608, 'grad_norm': 0.670913281212954, 'learning_rate': 6.813858427448921e-06, 'epoch': 0.4} + 40%|████ | 4881/12188 [10:33:02<15:18:01, 7.54s/it] 40%|████ | 4882/12188 [10:33:08<14:44:51, 7.27s/it] {'loss': 0.3262, 'grad_norm': 0.7059654924394448, 'learning_rate': 6.812620171384582e-06, 'epoch': 0.4} + 40%|████ | 4882/12188 [10:33:08<14:44:51, 7.27s/it] 40%|████ | 4883/12188 [10:33:17<15:21:44, 7.57s/it] {'loss': 0.3661, 'grad_norm': 0.6663320920632044, 'learning_rate': 6.811381787315919e-06, 'epoch': 0.4} + 40%|████ | 4883/12188 [10:33:17<15:21:44, 7.57s/it] 40%|████ | 4884/12188 [10:33:24<15:03:45, 7.42s/it] {'loss': 0.3308, 'grad_norm': 0.6265749732680048, 'learning_rate': 6.810143275330385e-06, 'epoch': 0.4} + 40%|████ | 4884/12188 [10:33:24<15:03:45, 7.42s/it] 40%|████ | 4885/12188 [10:33:30<14:40:28, 7.23s/it] {'loss': 0.3341, 'grad_norm': 0.5916058372343015, 'learning_rate': 6.80890463551544e-06, 'epoch': 0.4} + 40%|████ | 4885/12188 [10:33:30<14:40:28, 7.23s/it] 40%|████ | 4886/12188 [10:33:39<15:25:13, 7.60s/it] {'loss': 0.3298, 'grad_norm': 0.6847261485240617, 'learning_rate': 6.807665867958555e-06, 'epoch': 0.4} + 40%|████ | 4886/12188 [10:33:39<15:25:13, 7.60s/it] 40%|████ | 4887/12188 [10:33:47<15:39:44, 7.72s/it] {'loss': 0.3624, 'grad_norm': 0.6553643312483408, 'learning_rate': 6.806426972747212e-06, 'epoch': 0.4} + 40%|████ | 4887/12188 [10:33:47<15:39:44, 7.72s/it] 40%|████ | 4888/12188 [10:33:54<15:20:01, 7.56s/it] {'loss': 0.3608, 'grad_norm': 0.6949898098669737, 'learning_rate': 6.805187949968899e-06, 'epoch': 0.4} + 40%|████ | 4888/12188 [10:33:54<15:20:01, 7.56s/it] 40%|████ | 4889/12188 [10:34:01<14:51:55, 7.33s/it] {'loss': 0.3373, 'grad_norm': 0.62603060969543, 'learning_rate': 6.803948799711111e-06, 'epoch': 0.4} + 40%|████ | 4889/12188 [10:34:01<14:51:55, 7.33s/it] 40%|████ | 4890/12188 [10:34:08<14:31:45, 7.17s/it] {'loss': 0.3608, 'grad_norm': 0.6373425409426963, 'learning_rate': 6.80270952206136e-06, 'epoch': 0.4} + 40%|████ | 4890/12188 [10:34:08<14:31:45, 7.17s/it] 40%|████ | 4891/12188 [10:34:14<14:14:17, 7.02s/it] {'loss': 0.3556, 'grad_norm': 0.6592823659941973, 'learning_rate': 6.801470117107157e-06, 'epoch': 0.4} + 40%|████ | 4891/12188 [10:34:14<14:14:17, 7.02s/it] 40%|████ | 4892/12188 [10:34:21<14:08:59, 6.98s/it] {'loss': 0.3837, 'grad_norm': 0.6622851611266923, 'learning_rate': 6.8002305849360295e-06, 'epoch': 0.4} + 40%|████ | 4892/12188 [10:34:21<14:08:59, 6.98s/it] 40%|████ | 4893/12188 [10:34:28<14:04:52, 6.95s/it] {'loss': 0.3093, 'grad_norm': 0.6237076160689363, 'learning_rate': 6.798990925635509e-06, 'epoch': 0.4} + 40%|████ | 4893/12188 [10:34:28<14:04:52, 6.95s/it] 40%|████ | 4894/12188 [10:34:36<14:33:19, 7.18s/it] {'loss': 0.3547, 'grad_norm': 0.649112335353178, 'learning_rate': 6.79775113929314e-06, 'epoch': 0.4} + 40%|████ | 4894/12188 [10:34:36<14:33:19, 7.18s/it] 40%|████ | 4895/12188 [10:34:43<14:41:34, 7.25s/it] {'loss': 0.3594, 'grad_norm': 0.6644178723690366, 'learning_rate': 6.796511225996474e-06, 'epoch': 0.4} + 40%|████ | 4895/12188 [10:34:43<14:41:34, 7.25s/it] 40%|████ | 4896/12188 [10:34:50<14:35:39, 7.21s/it] {'loss': 0.3474, 'grad_norm': 0.6294704190966411, 'learning_rate': 6.795271185833072e-06, 'epoch': 0.4} + 40%|████ | 4896/12188 [10:34:50<14:35:39, 7.21s/it] 40%|████ | 4897/12188 [10:34:59<15:11:14, 7.50s/it] {'loss': 0.3695, 'grad_norm': 0.6685170968277875, 'learning_rate': 6.794031018890504e-06, 'epoch': 0.4} + 40%|████ | 4897/12188 [10:34:59<15:11:14, 7.50s/it] 40%|████ | 4898/12188 [10:35:05<14:46:31, 7.30s/it] {'loss': 0.4255, 'grad_norm': 0.7006229860418428, 'learning_rate': 6.792790725256347e-06, 'epoch': 0.4} + 40%|████ | 4898/12188 [10:35:05<14:46:31, 7.30s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1348, in _get_item + raise ValueError( +ValueError: Number of image tokens ['data/ant-design/tree/other_screenshot/original/CustomHierarchicalView_1741951688.260986.png'] does not match number of images None +[Try #0] Failed to fetch sample 1843732 in VC:s3://gui-agent/jedi/images/final_1.5m/final_1.5m_extracted/. Exception: Number of image tokens ['data/ant-design/tree/other_screenshot/original/CustomHierarchicalView_1741951688.260986.png'] does not match number of images None +Problematic sample: {'image': 'data/ant-design/tree/other_screenshot/original/CustomHierarchicalView_1741951688.260986.png', 'conversations': []} + 40%|████ | 4899/12188 [10:35:12<14:21:18, 7.09s/it] {'loss': 0.3622, 'grad_norm': 0.6581623093745641, 'learning_rate': 6.791550305018188e-06, 'epoch': 0.4} + 40%|████ | 4899/12188 [10:35:12<14:21:18, 7.09s/it] 40%|████ | 4900/12188 [10:35:20<14:54:51, 7.37s/it] {'loss': 0.3351, 'grad_norm': 0.6084159103938195, 'learning_rate': 6.7903097582636255e-06, 'epoch': 0.4} + 40%|████ | 4900/12188 [10:35:20<14:54:51, 7.37s/it] 40%|████ | 4901/12188 [10:35:27<14:31:39, 7.18s/it] {'loss': 0.3318, 'grad_norm': 0.6190081771103121, 'learning_rate': 6.789069085080265e-06, 'epoch': 0.4} + 40%|████ | 4901/12188 [10:35:27<14:31:39, 7.18s/it] 40%|████ | 4902/12188 [10:35:34<14:51:28, 7.34s/it] {'loss': 0.3512, 'grad_norm': 0.6191747396434278, 'learning_rate': 6.78782828555572e-06, 'epoch': 0.4} + 40%|████ | 4902/12188 [10:35:34<14:51:28, 7.34s/it] 40%|████ | 4903/12188 [10:35:42<14:51:30, 7.34s/it] {'loss': 0.3659, 'grad_norm': 0.6348349053785265, 'learning_rate': 6.786587359777612e-06, 'epoch': 0.4} + 40%|████ | 4903/12188 [10:35:42<14:51:30, 7.34s/it] 40%|████ | 4904/12188 [10:35:49<14:47:24, 7.31s/it] {'loss': 0.3836, 'grad_norm': 0.6677494050538845, 'learning_rate': 6.785346307833578e-06, 'epoch': 0.4} + 40%|████ | 4904/12188 [10:35:49<14:47:24, 7.31s/it] 40%|████ | 4905/12188 [10:35:56<14:50:34, 7.34s/it] {'loss': 0.3825, 'grad_norm': 0.6264477351309535, 'learning_rate': 6.7841051298112545e-06, 'epoch': 0.4} + 40%|████ | 4905/12188 [10:35:56<14:50:34, 7.34s/it] 40%|████ | 4906/12188 [10:36:05<15:34:22, 7.70s/it] {'loss': 0.366, 'grad_norm': 0.6635287889364051, 'learning_rate': 6.782863825798294e-06, 'epoch': 0.4} + 40%|████ | 4906/12188 [10:36:05<15:34:22, 7.70s/it] 40%|████ | 4907/12188 [10:36:12<15:13:25, 7.53s/it] {'loss': 0.3138, 'grad_norm': 0.7060649071338949, 'learning_rate': 6.781622395882352e-06, 'epoch': 0.4} + 40%|████ | 4907/12188 [10:36:12<15:13:25, 7.53s/it] 40%|████ | 4908/12188 [10:36:19<15:08:58, 7.49s/it] {'loss': 0.3406, 'grad_norm': 0.622463429764897, 'learning_rate': 6.780380840151101e-06, 'epoch': 0.4} + 40%|████ | 4908/12188 [10:36:19<15:08:58, 7.49s/it] 40%|████ | 4909/12188 [10:36:27<15:04:04, 7.45s/it] {'loss': 0.3277, 'grad_norm': 0.6267225274029479, 'learning_rate': 6.779139158692215e-06, 'epoch': 0.4} + 40%|████ | 4909/12188 [10:36:27<15:04:04, 7.45s/it] 40%|████ | 4910/12188 [10:36:34<15:01:06, 7.43s/it] {'loss': 0.3376, 'grad_norm': 0.6497938163112323, 'learning_rate': 6.77789735159338e-06, 'epoch': 0.4} + 40%|████ | 4910/12188 [10:36:34<15:01:06, 7.43s/it] 40%|████ | 4911/12188 [10:36:41<14:33:07, 7.20s/it] {'loss': 0.3137, 'grad_norm': 0.6626605057549633, 'learning_rate': 6.776655418942291e-06, 'epoch': 0.4} + 40%|████ | 4911/12188 [10:36:41<14:33:07, 7.20s/it] 40%|████ | 4912/12188 [10:36:48<14:38:28, 7.24s/it] {'loss': 0.3762, 'grad_norm': 0.6520541863309564, 'learning_rate': 6.7754133608266505e-06, 'epoch': 0.4} + 40%|████ | 4912/12188 [10:36:48<14:38:28, 7.24s/it] 40%|████ | 4913/12188 [10:36:56<15:05:34, 7.47s/it] {'loss': 0.3518, 'grad_norm': 0.6073489758230328, 'learning_rate': 6.77417117733417e-06, 'epoch': 0.4} + 40%|████ | 4913/12188 [10:36:56<15:05:34, 7.47s/it] 40%|████ | 4914/12188 [10:37:03<14:33:02, 7.20s/it] {'loss': 0.3527, 'grad_norm': 0.6675746167649793, 'learning_rate': 6.772928868552572e-06, 'epoch': 0.4} + 40%|████ | 4914/12188 [10:37:03<14:33:02, 7.20s/it] 40%|████ | 4915/12188 [10:37:10<14:28:04, 7.16s/it] {'loss': 0.3784, 'grad_norm': 0.6793227520611388, 'learning_rate': 6.771686434569587e-06, 'epoch': 0.4} + 40%|████ | 4915/12188 [10:37:10<14:28:04, 7.16s/it] 40%|████ | 4916/12188 [10:37:18<15:04:29, 7.46s/it] {'loss': 0.3506, 'grad_norm': 0.620309365696505, 'learning_rate': 6.77044387547295e-06, 'epoch': 0.4} + 40%|████ | 4916/12188 [10:37:18<15:04:29, 7.46s/it] 40%|████ | 4917/12188 [10:37:25<14:48:54, 7.34s/it] {'loss': 0.3133, 'grad_norm': 0.5700276757841729, 'learning_rate': 6.769201191350412e-06, 'epoch': 0.4} + 40%|████ | 4917/12188 [10:37:25<14:48:54, 7.34s/it] 40%|████ | 4918/12188 [10:37:33<14:55:43, 7.39s/it] {'loss': 0.3494, 'grad_norm': 0.6252086081463765, 'learning_rate': 6.767958382289729e-06, 'epoch': 0.4} + 40%|████ | 4918/12188 [10:37:33<14:55:43, 7.39s/it] 40%|████ | 4919/12188 [10:37:39<14:32:13, 7.20s/it] {'loss': 0.3346, 'grad_norm': 0.627933679215801, 'learning_rate': 6.7667154483786645e-06, 'epoch': 0.4} + 40%|████ | 4919/12188 [10:37:39<14:32:13, 7.20s/it] 40%|████ | 4920/12188 [10:37:46<14:21:42, 7.11s/it] {'loss': 0.3481, 'grad_norm': 0.6810044449668229, 'learning_rate': 6.765472389704994e-06, 'epoch': 0.4} + 40%|████ | 4920/12188 [10:37:46<14:21:42, 7.11s/it] 40%|████ | 4921/12188 [10:37:53<14:02:10, 6.95s/it] {'loss': 0.3563, 'grad_norm': 0.6484788462990652, 'learning_rate': 6.764229206356498e-06, 'epoch': 0.4} + 40%|████ | 4921/12188 [10:37:53<14:02:10, 6.95s/it] 40%|████ | 4922/12188 [10:38:00<14:00:43, 6.94s/it] {'loss': 0.3809, 'grad_norm': 0.6539835020266082, 'learning_rate': 6.762985898420973e-06, 'epoch': 0.4} + 40%|████ | 4922/12188 [10:38:00<14:00:43, 6.94s/it] 40%|████ | 4923/12188 [10:38:07<13:54:03, 6.89s/it] {'loss': 0.3464, 'grad_norm': 0.6122648808648451, 'learning_rate': 6.761742465986214e-06, 'epoch': 0.4} + 40%|████ | 4923/12188 [10:38:07<13:54:03, 6.89s/it] 40%|████ | 4924/12188 [10:38:13<13:44:58, 6.81s/it] {'loss': 0.352, 'grad_norm': 0.6382028844695699, 'learning_rate': 6.760498909140034e-06, 'epoch': 0.4} + 40%|████ | 4924/12188 [10:38:13<13:44:58, 6.81s/it] 40%|████ | 4925/12188 [10:38:20<13:37:29, 6.75s/it] {'loss': 0.3321, 'grad_norm': 0.6599365357192872, 'learning_rate': 6.759255227970248e-06, 'epoch': 0.4} + 40%|████ | 4925/12188 [10:38:20<13:37:29, 6.75s/it] 40%|████ | 4926/12188 [10:38:27<13:58:20, 6.93s/it] {'loss': 0.3214, 'grad_norm': 0.6080118933263182, 'learning_rate': 6.758011422564685e-06, 'epoch': 0.4} + 40%|████ | 4926/12188 [10:38:27<13:58:20, 6.93s/it] 40%|██���█ | 4927/12188 [10:38:34<13:55:51, 6.91s/it] {'loss': 0.3121, 'grad_norm': 0.597284155653588, 'learning_rate': 6.756767493011181e-06, 'epoch': 0.4} + 40%|████ | 4927/12188 [10:38:34<13:55:51, 6.91s/it] 40%|████ | 4928/12188 [10:38:42<14:45:38, 7.32s/it] {'loss': 0.3643, 'grad_norm': 0.6791713275425657, 'learning_rate': 6.755523439397578e-06, 'epoch': 0.4} + 40%|████ | 4928/12188 [10:38:42<14:45:38, 7.32s/it] 40%|████ | 4929/12188 [10:38:49<14:17:23, 7.09s/it] {'loss': 0.3281, 'grad_norm': 0.5831110253192245, 'learning_rate': 6.75427926181173e-06, 'epoch': 0.4} + 40%|████ | 4929/12188 [10:38:49<14:17:23, 7.09s/it] 40%|████ | 4930/12188 [10:38:58<15:17:17, 7.58s/it] {'loss': 0.3425, 'grad_norm': 0.6978067900902315, 'learning_rate': 6.753034960341498e-06, 'epoch': 0.4} + 40%|████ | 4930/12188 [10:38:58<15:17:17, 7.58s/it] 40%|████ | 4931/12188 [10:39:04<14:40:28, 7.28s/it] {'loss': 0.352, 'grad_norm': 0.6576029636761913, 'learning_rate': 6.7517905350747544e-06, 'epoch': 0.4} + 40%|████ | 4931/12188 [10:39:04<14:40:28, 7.28s/it] 40%|████ | 4932/12188 [10:39:12<14:50:02, 7.36s/it] {'loss': 0.3601, 'grad_norm': 0.6796491203545519, 'learning_rate': 6.7505459860993796e-06, 'epoch': 0.4} + 40%|████ | 4932/12188 [10:39:12<14:50:02, 7.36s/it] 40%|████ | 4933/12188 [10:39:19<14:43:53, 7.31s/it] {'loss': 0.3403, 'grad_norm': 0.6274827500540757, 'learning_rate': 6.749301313503258e-06, 'epoch': 0.4} + 40%|████ | 4933/12188 [10:39:19<14:43:53, 7.31s/it] 40%|████ | 4934/12188 [10:39:27<15:03:48, 7.48s/it] {'loss': 0.3555, 'grad_norm': 0.7194811571477671, 'learning_rate': 6.748056517374288e-06, 'epoch': 0.4} + 40%|████ | 4934/12188 [10:39:27<15:03:48, 7.48s/it] 40%|████ | 4935/12188 [10:39:34<14:42:28, 7.30s/it] {'loss': 0.3415, 'grad_norm': 0.6457035504708221, 'learning_rate': 6.746811597800376e-06, 'epoch': 0.4} + 40%|████ | 4935/12188 [10:39:34<14:42:28, 7.30s/it] 40%|████ | 4936/12188 [10:39:42<15:12:31, 7.55s/it] {'loss': 0.3328, 'grad_norm': 0.6430443794883016, 'learning_rate': 6.7455665548694334e-06, 'epoch': 0.4} + 40%|████ | 4936/12188 [10:39:42<15:12:31, 7.55s/it] 41%|████ | 4937/12188 [10:39:49<14:58:05, 7.43s/it] {'loss': 0.3417, 'grad_norm': 0.6355811629364211, 'learning_rate': 6.744321388669388e-06, 'epoch': 0.41} + 41%|████ | 4937/12188 [10:39:49<14:58:05, 7.43s/it] 41%|████ | 4938/12188 [10:39:56<14:31:56, 7.22s/it] {'loss': 0.3642, 'grad_norm': 0.6714736337505299, 'learning_rate': 6.743076099288167e-06, 'epoch': 0.41} + 41%|████ | 4938/12188 [10:39:56<14:31:56, 7.22s/it] 41%|████ | 4939/12188 [10:40:04<15:17:06, 7.59s/it] {'loss': 0.3517, 'grad_norm': 0.6015327803643041, 'learning_rate': 6.7418306868137125e-06, 'epoch': 0.41} + 41%|████ | 4939/12188 [10:40:04<15:17:06, 7.59s/it] 41%|████ | 4940/12188 [10:40:12<15:12:52, 7.56s/it] {'loss': 0.3309, 'grad_norm': 0.6167796926026554, 'learning_rate': 6.7405851513339736e-06, 'epoch': 0.41} + 41%|████ | 4940/12188 [10:40:12<15:12:52, 7.56s/it] 41%|████ | 4941/12188 [10:40:18<14:43:35, 7.32s/it] {'loss': 0.3334, 'grad_norm': 0.6452911913742196, 'learning_rate': 6.739339492936907e-06, 'epoch': 0.41} + 41%|████ | 4941/12188 [10:40:18<14:43:35, 7.32s/it] 41%|████ | 4942/12188 [10:40:25<14:33:34, 7.23s/it] {'loss': 0.3242, 'grad_norm': 0.6667441642782671, 'learning_rate': 6.7380937117104816e-06, 'epoch': 0.41} + 41%|████ | 4942/12188 [10:40:25<14:33:34, 7.23s/it] 41%|████ | 4943/12188 [10:40:32<14:19:17, 7.12s/it] {'loss': 0.3536, 'grad_norm': 0.6251854783080425, 'learning_rate': 6.736847807742668e-06, 'epoch': 0.41} + 41%|████ | 4943/12188 [10:40:32<14:19:17, 7.12s/it] 41%|████ | 4944/12188 [10:40:40<14:55:29, 7.42s/it] {'loss': 0.3572, 'grad_norm': 0.6386899039256704, 'learning_rate': 6.735601781121454e-06, 'epoch': 0.41} + 41%|████ | 4944/12188 [10:40:40<14:55:29, 7.42s/it] 41%|████ | 4945/12188 [10:40:49<15:51:57, 7.89s/it] {'loss': 0.3194, 'grad_norm': 0.6249290442377724, 'learning_rate': 6.734355631934832e-06, 'epoch': 0.41} + 41%|████ | 4945/12188 [10:40:49<15:51:57, 7.89s/it] 41%|████ | 4946/12188 [10:40:57<15:57:55, 7.94s/it] {'loss': 0.3299, 'grad_norm': 0.6591062480166139, 'learning_rate': 6.733109360270802e-06, 'epoch': 0.41} + 41%|████ | 4946/12188 [10:40:57<15:57:55, 7.94s/it] 41%|████ | 4947/12188 [10:41:05<15:51:49, 7.89s/it] {'loss': 0.3294, 'grad_norm': 0.6193065056615554, 'learning_rate': 6.731862966217372e-06, 'epoch': 0.41} + 41%|████ | 4947/12188 [10:41:05<15:51:49, 7.89s/it] 41%|████ | 4948/12188 [10:41:12<15:07:48, 7.52s/it] {'loss': 0.3869, 'grad_norm': 0.6898001411562877, 'learning_rate': 6.730616449862562e-06, 'epoch': 0.41} + 41%|████ | 4948/12188 [10:41:12<15:07:48, 7.52s/it] 41%|████ | 4949/12188 [10:41:19<15:09:06, 7.54s/it] {'loss': 0.3253, 'grad_norm': 0.6936790894746817, 'learning_rate': 6.7293698112943975e-06, 'epoch': 0.41} + 41%|████ | 4949/12188 [10:41:19<15:09:06, 7.54s/it] 41%|████ | 4950/12188 [10:41:27<15:20:07, 7.63s/it] {'loss': 0.2939, 'grad_norm': 0.6120317013108436, 'learning_rate': 6.728123050600917e-06, 'epoch': 0.41} + 41%|████ | 4950/12188 [10:41:27<15:20:07, 7.63s/it] 41%|████ | 4951/12188 [10:41:35<15:18:44, 7.62s/it] {'loss': 0.3795, 'grad_norm': 0.6872433011366046, 'learning_rate': 6.726876167870162e-06, 'epoch': 0.41} + 41%|████ | 4951/12188 [10:41:35<15:18:44, 7.62s/it] 41%|████ | 4952/12188 [10:41:42<15:04:36, 7.50s/it] {'loss': 0.3783, 'grad_norm': 0.6585342409400938, 'learning_rate': 6.725629163190188e-06, 'epoch': 0.41} + 41%|████ | 4952/12188 [10:41:42<15:04:36, 7.50s/it] 41%|████ | 4953/12188 [10:41:49<14:56:55, 7.44s/it] {'loss': 0.338, 'grad_norm': 0.6426335152317297, 'learning_rate': 6.724382036649054e-06, 'epoch': 0.41} + 41%|████ | 4953/12188 [10:41:49<14:56:55, 7.44s/it] 41%|████ | 4954/12188 [10:41:56<14:33:52, 7.25s/it] {'loss': 0.3709, 'grad_norm': 0.6992265907225961, 'learning_rate': 6.723134788334831e-06, 'epoch': 0.41} + 41%|████ | 4954/12188 [10:41:56<14:33:52, 7.25s/it] 41%|████ | 4955/12188 [10:42:03<14:16:52, 7.11s/it] {'loss': 0.307, 'grad_norm': 0.6826438677400899, 'learning_rate': 6.721887418335598e-06, 'epoch': 0.41} + 41%|████ | 4955/12188 [10:42:03<14:16:52, 7.11s/it] 41%|████ | 4956/12188 [10:42:11<14:36:26, 7.27s/it] {'loss': 0.3862, 'grad_norm': 0.6567698816605938, 'learning_rate': 6.720639926739442e-06, 'epoch': 0.41} + 41%|████ | 4956/12188 [10:42:11<14:36:26, 7.27s/it] 41%|████ | 4957/12188 [10:42:18<14:52:04, 7.40s/it] {'loss': 0.3511, 'grad_norm': 0.5949663256800234, 'learning_rate': 6.719392313634457e-06, 'epoch': 0.41} + 41%|████ | 4957/12188 [10:42:18<14:52:04, 7.40s/it] 41%|████ | 4958/12188 [10:42:25<14:27:13, 7.20s/it] {'loss': 0.342, 'grad_norm': 0.6143614122653783, 'learning_rate': 6.718144579108751e-06, 'epoch': 0.41} + 41%|████ | 4958/12188 [10:42:25<14:27:13, 7.20s/it] 41%|████ | 4959/12188 [10:42:32<14:06:41, 7.03s/it] {'loss': 0.3445, 'grad_norm': 0.6490121842429755, 'learning_rate': 6.716896723250435e-06, 'epoch': 0.41} + 41%|████ | 4959/12188 [10:42:32<14:06:41, 7.03s/it] 41%|████ | 4960/12188 [10:42:39<14:18:05, 7.12s/it] {'loss': 0.3343, 'grad_norm': 0.6077458181808136, 'learning_rate': 6.715648746147632e-06, 'epoch': 0.41} + 41%|████ | 4960/12188 [10:42:39<14:18:05, 7.12s/it] 41%|████ | 4961/12188 [10:42:47<14:39:15, 7.30s/it] {'loss': 0.3286, 'grad_norm': 0.6388800332236465, 'learning_rate': 6.714400647888468e-06, 'epoch': 0.41} + 41%|████ | 4961/12188 [10:42:47<14:39:15, 7.30s/it] 41%|████ | 4962/12188 [10:42:54<14:27:21, 7.20s/it] {'loss': 0.3201, 'grad_norm': 0.6001355746285076, 'learning_rate': 6.7131524285610875e-06, 'epoch': 0.41} + 41%|████ | 4962/12188 [10:42:54<14:27:21, 7.20s/it] 41%|████ | 4963/12188 [10:43:01<14:50:02, 7.39s/it] {'loss': 0.3063, 'grad_norm': 0.5746255611035749, 'learning_rate': 6.7119040882536335e-06, 'epoch': 0.41} + 41%|████ | 4963/12188 [10:43:01<14:50:02, 7.39s/it] 41%|████ | 4964/12188 [10:43:08<14:21:10, 7.15s/it] {'loss': 0.3646, 'grad_norm': 0.6608332809172109, 'learning_rate': 6.7106556270542635e-06, 'epoch': 0.41} + 41%|████ | 4964/12188 [10:43:08<14:21:10, 7.15s/it] 41%|████ | 4965/12188 [10:43:15<14:02:29, 7.00s/it] {'loss': 0.3538, 'grad_norm': 0.6506754507602338, 'learning_rate': 6.709407045051141e-06, 'epoch': 0.41} + 41%|████ | 4965/12188 [10:43:15<14:02:29, 7.00s/it] 41%|████ | 4966/12188 [10:43:22<14:04:39, 7.02s/it] {'loss': 0.382, 'grad_norm': 0.651193599222472, 'learning_rate': 6.70815834233244e-06, 'epoch': 0.41} + 41%|████ | 4966/12188 [10:43:22<14:04:39, 7.02s/it] 41%|████ | 4967/12188 [10:43:29<14:04:04, 7.01s/it] {'loss': 0.337, 'grad_norm': 0.6207900176991058, 'learning_rate': 6.706909518986341e-06, 'epoch': 0.41} + 41%|████ | 4967/12188 [10:43:29<14:04:04, 7.01s/it] 41%|████ | 4968/12188 [10:43:36<14:11:42, 7.08s/it] {'loss': 0.3199, 'grad_norm': 0.6355801083281849, 'learning_rate': 6.705660575101034e-06, 'epoch': 0.41} + 41%|████ | 4968/12188 [10:43:36<14:11:42, 7.08s/it] 41%|████ | 4969/12188 [10:43:43<13:59:18, 6.98s/it] {'loss': 0.3804, 'grad_norm': 0.6704655640104885, 'learning_rate': 6.704411510764718e-06, 'epoch': 0.41} + 41%|████ | 4969/12188 [10:43:43<13:59:18, 6.98s/it] 41%|████ | 4970/12188 [10:43:49<13:45:10, 6.86s/it] {'loss': 0.3711, 'grad_norm': 0.6744296722482246, 'learning_rate': 6.703162326065597e-06, 'epoch': 0.41} + 41%|████ | 4970/12188 [10:43:49<13:45:10, 6.86s/it] 41%|████ | 4971/12188 [10:43:57<14:07:21, 7.04s/it] {'loss': 0.3353, 'grad_norm': 0.6615010622717705, 'learning_rate': 6.701913021091891e-06, 'epoch': 0.41} + 41%|████ | 4971/12188 [10:43:57<14:07:21, 7.04s/it] 41%|████ | 4972/12188 [10:44:03<13:48:23, 6.89s/it] {'loss': 0.3358, 'grad_norm': 0.6648781605942418, 'learning_rate': 6.700663595931822e-06, 'epoch': 0.41} + 41%|████ | 4972/12188 [10:44:03<13:48:23, 6.89s/it] 41%|████ | 4973/12188 [10:44:11<14:01:30, 7.00s/it] {'loss': 0.3722, 'grad_norm': 0.6660883913773978, 'learning_rate': 6.699414050673623e-06, 'epoch': 0.41} + 41%|████ | 4973/12188 [10:44:11<14:01:30, 7.00s/it] 41%|████ | 4974/12188 [10:44:17<13:53:20, 6.93s/it] {'loss': 0.3409, 'grad_norm': 0.6683334757855927, 'learning_rate': 6.6981643854055335e-06, 'epoch': 0.41} + 41%|████ | 4974/12188 [10:44:17<13:53:20, 6.93s/it] 41%|████ | 4975/12188 [10:44:24<13:47:20, 6.88s/it] {'loss': 0.3192, 'grad_norm': 0.6360582508547494, 'learning_rate': 6.696914600215803e-06, 'epoch': 0.41} + 41%|████ | 4975/12188 [10:44:24<13:47:20, 6.88s/it] 41%|████ | 4976/12188 [10:44:31<13:32:56, 6.76s/it] {'loss': 0.3673, 'grad_norm': 0.642414970025541, 'learning_rate': 6.695664695192689e-06, 'epoch': 0.41} + 41%|████ | 4976/12188 [10:44:31<13:32:56, 6.76s/it] 41%|████ | 4977/12188 [10:44:38<13:58:37, 6.98s/it] {'loss': 0.3637, 'grad_norm': 0.6967030888198229, 'learning_rate': 6.694414670424458e-06, 'epoch': 0.41} + 41%|████ | 4977/12188 [10:44:38<13:58:37, 6.98s/it] 41%|████ | 4978/12188 [10:44:44<13:38:53, 6.81s/it] {'loss': 0.2912, 'grad_norm': 0.6573774065649983, 'learning_rate': 6.693164525999387e-06, 'epoch': 0.41} + 41%|████ | 4978/12188 [10:44:44<13:38:53, 6.81s/it] 41%|████ | 4979/12188 [10:44:51<13:35:08, 6.78s/it] {'loss': 0.3448, 'grad_norm': 0.651196500176776, 'learning_rate': 6.691914262005756e-06, 'epoch': 0.41} + 41%|████ | 4979/12188 [10:44:51<13:35:08, 6.78s/it] 41%|████ | 4980/12188 [10:44:59<13:59:19, 6.99s/it] {'loss': 0.3124, 'grad_norm': 0.6452126886409756, 'learning_rate': 6.69066387853186e-06, 'epoch': 0.41} + 41%|████ | 4980/12188 [10:44:59<13:59:19, 6.99s/it] 41%|████ | 4981/12188 [10:45:06<14:11:29, 7.09s/it] {'loss': 0.3063, 'grad_norm': 0.6640241667349844, 'learning_rate': 6.689413375665996e-06, 'epoch': 0.41} + 41%|████ | 4981/12188 [10:45:06<14:11:29, 7.09s/it] 41%|████ | 4982/12188 [10:45:13<14:04:22, 7.03s/it] {'loss': 0.3167, 'grad_norm': 0.6369922072037174, 'learning_rate': 6.6881627534964745e-06, 'epoch': 0.41} + 41%|████ | 4982/12188 [10:45:13<14:04:22, 7.03s/it] 41%|████ | 4983/12188 [10:45:20<14:09:51, 7.08s/it] {'loss': 0.3355, 'grad_norm': 0.6106326831269132, 'learning_rate': 6.68691201211161e-06, 'epoch': 0.41} + 41%|████ | 4983/12188 [10:45:20<14:09:51, 7.08s/it] 41%|████ | 4984/12188 [10:45:27<14:07:17, 7.06s/it] {'loss': 0.3474, 'grad_norm': 0.5821905421685835, 'learning_rate': 6.68566115159973e-06, 'epoch': 0.41} + 41%|████ | 4984/12188 [10:45:27<14:07:17, 7.06s/it] 41%|████ | 4985/12188 [10:45:33<13:40:15, 6.83s/it] {'loss': 0.3776, 'grad_norm': 0.6815429786556128, 'learning_rate': 6.6844101720491685e-06, 'epoch': 0.41} + 41%|████ | 4985/12188 [10:45:33<13:40:15, 6.83s/it] 41%|████ | 4986/12188 [10:45:40<13:45:07, 6.87s/it] {'loss': 0.3455, 'grad_norm': 0.6074691499205703, 'learning_rate': 6.6831590735482665e-06, 'epoch': 0.41} + 41%|████ | 4986/12188 [10:45:40<13:45:07, 6.87s/it] 41%|████ | 4987/12188 [10:45:48<14:04:40, 7.04s/it] {'loss': 0.3429, 'grad_norm': 0.680081132596233, 'learning_rate': 6.681907856185375e-06, 'epoch': 0.41} + 41%|████ | 4987/12188 [10:45:48<14:04:40, 7.04s/it] 41%|████ | 4988/12188 [10:45:54<13:44:43, 6.87s/it] {'loss': 0.3808, 'grad_norm': 0.6639730929872217, 'learning_rate': 6.680656520048852e-06, 'epoch': 0.41} + 41%|████ | 4988/12188 [10:45:54<13:44:43, 6.87s/it] 41%|████ | 4989/12188 [10:46:02<14:31:28, 7.26s/it] {'loss': 0.3411, 'grad_norm': 0.6237737987327995, 'learning_rate': 6.679405065227067e-06, 'epoch': 0.41} + 41%|████ | 4989/12188 [10:46:02<14:31:28, 7.26s/it] 41%|████ | 4990/12188 [10:46:09<14:07:09, 7.06s/it] {'loss': 0.3599, 'grad_norm': 0.6226136934287759, 'learning_rate': 6.678153491808394e-06, 'epoch': 0.41} + 41%|████ | 4990/12188 [10:46:09<14:07:09, 7.06s/it] 41%|████ | 4991/12188 [10:46:16<14:01:24, 7.01s/it] {'loss': 0.3316, 'grad_norm': 0.6661661717329688, 'learning_rate': 6.6769017998812165e-06, 'epoch': 0.41} + 41%|████ | 4991/12188 [10:46:16<14:01:24, 7.01s/it] 41%|████ | 4992/12188 [10:46:23<13:47:38, 6.90s/it] {'loss': 0.3457, 'grad_norm': 0.6748625486470552, 'learning_rate': 6.67564998953393e-06, 'epoch': 0.41} + 41%|████ | 4992/12188 [10:46:23<13:47:38, 6.90s/it] 41%|████ | 4993/12188 [10:46:30<13:54:23, 6.96s/it] {'loss': 0.4017, 'grad_norm': 0.6366100156841996, 'learning_rate': 6.674398060854931e-06, 'epoch': 0.41} + 41%|████ | 4993/12188 [10:46:30<13:54:23, 6.96s/it] 41%|████ | 4994/12188 [10:46:37<13:51:58, 6.94s/it] {'loss': 0.3469, 'grad_norm': 0.6742550067557929, 'learning_rate': 6.673146013932633e-06, 'epoch': 0.41} + 41%|████ | 4994/12188 [10:46:37<13:51:58, 6.94s/it] 41%|████ | 4995/12188 [10:46:43<13:42:33, 6.86s/it] {'loss': 0.3896, 'grad_norm': 0.6709160177844344, 'learning_rate': 6.671893848855451e-06, 'epoch': 0.41} + 41%|████ | 4995/12188 [10:46:43<13:42:33, 6.86s/it] 41%|████ | 4996/12188 [10:46:50<13:35:51, 6.81s/it] {'loss': 0.3275, 'grad_norm': 0.6649031515274719, 'learning_rate': 6.670641565711812e-06, 'epoch': 0.41} + 41%|████ | 4996/12188 [10:46:50<13:35:51, 6.81s/it] 41%|████ | 4997/12188 [10:46:57<13:33:12, 6.79s/it] {'loss': 0.3169, 'grad_norm': 0.6263667572897673, 'learning_rate': 6.6693891645901475e-06, 'epoch': 0.41} + 41%|████ | 4997/12188 [10:46:57<13:33:12, 6.79s/it] 41%|████ | 4998/12188 [10:47:04<13:44:08, 6.88s/it] {'loss': 0.3566, 'grad_norm': 0.6015805321313127, 'learning_rate': 6.668136645578904e-06, 'epoch': 0.41} + 41%|████ | 4998/12188 [10:47:04<13:44:08, 6.88s/it] 41%|████ | 4999/12188 [10:47:13<15:12:54, 7.62s/it] {'loss': 0.3246, 'grad_norm': 0.630318823135579, 'learning_rate': 6.666884008766532e-06, 'epoch': 0.41} + 41%|████ | 4999/12188 [10:47:13<15:12:54, 7.62s/it] 41%|████ | 5000/12188 [10:47:20<14:34:42, 7.30s/it] {'loss': 0.3493, 'grad_norm': 0.6277027701259991, 'learning_rate': 6.665631254241488e-06, 'epoch': 0.41} + 41%|████ | 5000/12188 [10:47:20<14:34:42, 7.30s/it]/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/utils/checkpoint.py:87: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( + 41%|████ | 5001/12188 [10:47:43<24:08:06, 12.09s/it] {'loss': 0.3632, 'grad_norm': 0.6388392988352803, 'learning_rate': 6.66437838209224e-06, 'epoch': 0.41} + 41%|████ | 5001/12188 [10:47:43<24:08:06, 12.09s/it] 41%|████ | 5002/12188 [10:47:52<22:07:10, 11.08s/it] {'loss': 0.396, 'grad_norm': 0.6497015136664338, 'learning_rate': 6.663125392407266e-06, 'epoch': 0.41} + 41%|████ | 5002/12188 [10:47:52<22:07:10, 11.08s/it] 41%|████ | 5003/12188 [10:47:59<19:37:58, 9.84s/it] {'loss': 0.3835, 'grad_norm': 0.7111299049647726, 'learning_rate': 6.661872285275048e-06, 'epoch': 0.41} + 41%|████ | 5003/12188 [10:47:59<19:37:58, 9.84s/it] 41%|████ | 5004/12188 [10:48:06<18:27:03, 9.25s/it] {'loss': 0.3417, 'grad_norm': 0.6585530007357562, 'learning_rate': 6.66061906078408e-06, 'epoch': 0.41} + 41%|████ | 5004/12188 [10:48:06<18:27:03, 9.25s/it] 41%|████ | 5005/12188 [10:48:14<17:25:46, 8.74s/it] {'loss': 0.3412, 'grad_norm': 0.6142745560125971, 'learning_rate': 6.659365719022862e-06, 'epoch': 0.41} + 41%|████ | 5005/12188 [10:48:14<17:25:46, 8.74s/it] 41%|████ | 5006/12188 [10:48:24<18:04:20, 9.06s/it] {'loss': 0.3561, 'grad_norm': 0.5960536422366214, 'learning_rate': 6.6581122600799005e-06, 'epoch': 0.41} + 41%|████ | 5006/12188 [10:48:24<18:04:20, 9.06s/it] 41%|████ | 5007/12188 [10:48:31<16:55:52, 8.49s/it] {'loss': 0.3006, 'grad_norm': 0.587578705635884, 'learning_rate': 6.656858684043718e-06, 'epoch': 0.41} + 41%|████ | 5007/12188 [10:48:31<16:55:52, 8.49s/it] 41%|████ | 5008/12188 [10:48:38<16:00:41, 8.03s/it] {'loss': 0.3511, 'grad_norm': 0.6779802107705379, 'learning_rate': 6.655604991002838e-06, 'epoch': 0.41} + 41%|████ | 5008/12188 [10:48:38<16:00:41, 8.03s/it] 41%|████ | 5009/12188 [10:48:45<15:27:21, 7.75s/it] {'loss': 0.3556, 'grad_norm': 0.7216523391797738, 'learning_rate': 6.654351181045791e-06, 'epoch': 0.41} + 41%|████ | 5009/12188 [10:48:45<15:27:21, 7.75s/it] 41%|████ | 5010/12188 [10:48:53<15:19:07, 7.68s/it] {'loss': 0.3757, 'grad_norm': 0.6486286644516935, 'learning_rate': 6.653097254261123e-06, 'epoch': 0.41} + 41%|████ | 5010/12188 [10:48:53<15:19:07, 7.68s/it] 41%|████ | 5011/12188 [10:48:59<14:42:49, 7.38s/it] {'loss': 0.3588, 'grad_norm': 0.677709925093962, 'learning_rate': 6.6518432107373805e-06, 'epoch': 0.41} + 41%|████ | 5011/12188 [10:48:59<14:42:49, 7.38s/it] 41%|████ | 5012/12188 [10:49:06<14:17:56, 7.17s/it] {'loss': 0.3364, 'grad_norm': 0.6544873266376132, 'learning_rate': 6.6505890505631255e-06, 'epoch': 0.41} + 41%|████ | 5012/12188 [10:49:06<14:17:56, 7.17s/it] 41%|████ | 5013/12188 [10:49:13<14:26:15, 7.24s/it] {'loss': 0.3445, 'grad_norm': 0.6174123392378338, 'learning_rate': 6.649334773826924e-06, 'epoch': 0.41} + 41%|████ | 5013/12188 [10:49:13<14:26:15, 7.24s/it] 41%|████ | 5014/12188 [10:49:20<14:11:52, 7.12s/it] {'loss': 0.3777, 'grad_norm': 0.6772261338339999, 'learning_rate': 6.648080380617351e-06, 'epoch': 0.41} + 41%|████ | 5014/12188 [10:49:20<14:11:52, 7.12s/it] 41%|████ | 5015/12188 [10:49:27<14:11:21, 7.12s/it] {'loss': 0.3612, 'grad_norm': 0.6383983032210293, 'learning_rate': 6.646825871022989e-06, 'epoch': 0.41} + 41%|████ | 5015/12188 [10:49:27<14:11:21, 7.12s/it] 41%|████ | 5016/12188 [10:49:34<14:14:13, 7.15s/it] {'loss': 0.3566, 'grad_norm': 0.7600111205851326, 'learning_rate': 6.6455712451324295e-06, 'epoch': 0.41} + 41%|████ | 5016/12188 [10:49:34<14:14:13, 7.15s/it] 41%|████ | 5017/12188 [10:49:42<14:24:02, 7.23s/it] {'loss': 0.2982, 'grad_norm': 0.5990190901779086, 'learning_rate': 6.644316503034274e-06, 'epoch': 0.41} + 41%|████ | 5017/12188 [10:49:42<14:24:02, 7.23s/it] 41%|████ | 5018/12188 [10:49:49<14:31:44, 7.29s/it] {'loss': 0.324, 'grad_norm': 0.6501365718313655, 'learning_rate': 6.643061644817127e-06, 'epoch': 0.41} + 41%|████ | 5018/12188 [10:49:49<14:31:44, 7.29s/it] 41%|████ | 5019/12188 [10:49:56<14:22:33, 7.22s/it] {'loss': 0.3626, 'grad_norm': 0.6286857421259688, 'learning_rate': 6.641806670569607e-06, 'epoch': 0.41} + 41%|████ | 5019/12188 [10:49:56<14:22:33, 7.22s/it] 41%|████ | 5020/12188 [10:50:03<14:05:27, 7.08s/it] {'loss': 0.3174, 'grad_norm': 0.6314497638014579, 'learning_rate': 6.640551580380337e-06, 'epoch': 0.41} + 41%|████ | 5020/12188 [10:50:03<14:05:27, 7.08s/it] 41%|████ | 5021/12188 [10:50:10<13:54:57, 6.99s/it] {'loss': 0.3286, 'grad_norm': 0.6422253268157427, 'learning_rate': 6.639296374337952e-06, 'epoch': 0.41} + 41%|████ | 5021/12188 [10:50:10<13:54:57, 6.99s/it] 41%|████ | 5022/12188 [10:50:17<14:15:24, 7.16s/it] {'loss': 0.3562, 'grad_norm': 0.6065883730198437, 'learning_rate': 6.63804105253109e-06, 'epoch': 0.41} + 41%|████ | 5022/12188 [10:50:18<14:15:24, 7.16s/it] 41%|████ | 5023/12188 [10:50:28<15:59:14, 8.03s/it] {'loss': 0.3163, 'grad_norm': 0.6899660047667886, 'learning_rate': 6.6367856150484e-06, 'epoch': 0.41} + 41%|████ | 5023/12188 [10:50:28<15:59:14, 8.03s/it] 41%|████ | 5024/12188 [10:50:34<15:18:16, 7.69s/it] {'loss': 0.3677, 'grad_norm': 0.6657463011718957, 'learning_rate': 6.635530061978539e-06, 'epoch': 0.41} + 41%|████ | 5024/12188 [10:50:34<15:18:16, 7.69s/it] 41%|████ | 5025/12188 [10:50:42<14:59:44, 7.54s/it] {'loss': 0.3708, 'grad_norm': 0.6462178390387865, 'learning_rate': 6.634274393410174e-06, 'epoch': 0.41} + 41%|████ | 5025/12188 [10:50:42<14:59:44, 7.54s/it] 41%|████ | 5026/12188 [10:50:48<14:27:33, 7.27s/it] {'loss': 0.3744, 'grad_norm': 0.6612016531120649, 'learning_rate': 6.633018609431976e-06, 'epoch': 0.41} + 41%|████ | 5026/12188 [10:50:48<14:27:33, 7.27s/it] 41%|████ | 5027/12188 [10:50:56<14:56:02, 7.51s/it] {'loss': 0.3386, 'grad_norm': 0.5999474416529031, 'learning_rate': 6.631762710132628e-06, 'epoch': 0.41} + 41%|████ | 5027/12188 [10:50:56<14:56:02, 7.51s/it] 41%|████▏ | 5028/12188 [10:51:05<15:32:01, 7.81s/it] {'loss': 0.3648, 'grad_norm': 0.6407497962099511, 'learning_rate': 6.630506695600819e-06, 'epoch': 0.41} + 41%|████▏ | 5028/12188 [10:51:05<15:32:01, 7.81s/it] 41%|████▏ | 5029/12188 [10:51:12<15:24:45, 7.75s/it] {'loss': 0.3323, 'grad_norm': 0.603437618416668, 'learning_rate': 6.6292505659252475e-06, 'epoch': 0.41} + 41%|████▏ | 5029/12188 [10:51:12<15:24:45, 7.75s/it] 41%|████▏ | 5030/12188 [10:51:19<14:48:33, 7.45s/it] {'loss': 0.3898, 'grad_norm': 0.657752716366616, 'learning_rate': 6.627994321194618e-06, 'epoch': 0.41} + 41%|████▏ | 5030/12188 [10:51:19<14:48:33, 7.45s/it] 41%|████▏ | 5031/12188 [10:51:26<14:09:24, 7.12s/it] {'loss': 0.316, 'grad_norm': 0.6296077181788612, 'learning_rate': 6.626737961497645e-06, 'epoch': 0.41} + 41%|████▏ | 5031/12188 [10:51:26<14:09:24, 7.12s/it] 41%|████▏ | 5032/12188 [10:51:33<14:17:22, 7.19s/it] {'loss': 0.3247, 'grad_norm': 0.6754773090990753, 'learning_rate': 6.62548148692305e-06, 'epoch': 0.41} + 41%|████▏ | 5032/12188 [10:51:33<14:17:22, 7.19s/it] 41%|████▏ | 5033/12188 [10:51:40<14:16:17, 7.18s/it] {'loss': 0.315, 'grad_norm': 0.6908093135626663, 'learning_rate': 6.6242248975595636e-06, 'epoch': 0.41} + 41%|████▏ | 5033/12188 [10:51:40<14:16:17, 7.18s/it] 41%|████▏ | 5034/12188 [10:51:47<13:57:38, 7.03s/it] {'loss': 0.3549, 'grad_norm': 0.6147823994390029, 'learning_rate': 6.622968193495925e-06, 'epoch': 0.41} + 41%|████▏ | 5034/12188 [10:51:47<13:57:38, 7.03s/it] 41%|████▏ | 5035/12188 [10:51:54<13:59:41, 7.04s/it] {'loss': 0.3135, 'grad_norm': 0.7033291754510901, 'learning_rate': 6.621711374820881e-06, 'epoch': 0.41} + 41%|████▏ | 5035/12188 [10:51:54<13:59:41, 7.04s/it] 41%|████▏ | 5036/12188 [10:52:01<14:05:02, 7.09s/it] {'loss': 0.3328, 'grad_norm': 0.6143944810929215, 'learning_rate': 6.6204544416231865e-06, 'epoch': 0.41} + 41%|████▏ | 5036/12188 [10:52:01<14:05:02, 7.09s/it] 41%|████▏ | 5037/12188 [10:52:08<14:02:13, 7.07s/it] {'loss': 0.3777, 'grad_norm': 0.6697546729965775, 'learning_rate': 6.619197393991601e-06, 'epoch': 0.41} + 41%|████▏ | 5037/12188 [10:52:08<14:02:13, 7.07s/it] 41%|████▏ | 5038/12188 [10:52:15<13:44:14, 6.92s/it] {'loss': 0.3746, 'grad_norm': 0.6410777126923382, 'learning_rate': 6.617940232014896e-06, 'epoch': 0.41} + 41%|████▏ | 5038/12188 [10:52:15<13:44:14, 6.92s/it] 41%|████▏ | 5039/12188 [10:52:23<14:36:17, 7.35s/it] {'loss': 0.334, 'grad_norm': 0.6843113135794406, 'learning_rate': 6.616682955781853e-06, 'epoch': 0.41} + 41%|████▏ | 5039/12188 [10:52:23<14:36:17, 7.35s/it] 41%|████▏ | 5040/12188 [10:52:30<14:16:00, 7.19s/it] {'loss': 0.339, 'grad_norm': 0.6478038706513792, 'learning_rate': 6.615425565381255e-06, 'epoch': 0.41} + 41%|████▏ | 5040/12188 [10:52:30<14:16:00, 7.19s/it] 41%|████▏ | 5041/12188 [10:52:37<14:23:17, 7.25s/it] {'loss': 0.3611, 'grad_norm': 0.6197252842322732, 'learning_rate': 6.614168060901901e-06, 'epoch': 0.41} + 41%|████▏ | 5041/12188 [10:52:37<14:23:17, 7.25s/it] 41%|████▏ | 5042/12188 [10:52:45<14:56:07, 7.52s/it] {'loss': 0.3298, 'grad_norm': 0.580822767388757, 'learning_rate': 6.612910442432592e-06, 'epoch': 0.41} + 41%|████▏ | 5042/12188 [10:52:45<14:56:07, 7.52s/it] 41%|████▏ | 5043/12188 [10:52:52<14:22:52, 7.25s/it] {'loss': 0.3833, 'grad_norm': 0.6841798946756973, 'learning_rate': 6.611652710062138e-06, 'epoch': 0.41} + 41%|████▏ | 5043/12188 [10:52:52<14:22:52, 7.25s/it] 41%|████▏ | 5044/12188 [10:52:59<14:07:49, 7.12s/it] {'loss': 0.3596, 'grad_norm': 0.6156749782527996, 'learning_rate': 6.610394863879358e-06, 'epoch': 0.41} + 41%|████▏ | 5044/12188 [10:52:59<14:07:49, 7.12s/it] 41%|████▏ | 5045/12188 [10:53:06<14:07:14, 7.12s/it] {'loss': 0.3503, 'grad_norm': 0.6755142132743711, 'learning_rate': 6.609136903973081e-06, 'epoch': 0.41} + 41%|████▏ | 5045/12188 [10:53:06<14:07:14, 7.12s/it] 41%|████▏ | 5046/12188 [10:53:13<14:08:41, 7.13s/it] {'loss': 0.3385, 'grad_norm': 0.6917881531617888, 'learning_rate': 6.6078788304321405e-06, 'epoch': 0.41} + 41%|████▏ | 5046/12188 [10:53:13<14:08:41, 7.13s/it] 41%|████▏ | 5047/12188 [10:53:20<13:52:08, 6.99s/it] {'loss': 0.3457, 'grad_norm': 0.618165841089808, 'learning_rate': 6.60662064334538e-06, 'epoch': 0.41} + 41%|████▏ | 5047/12188 [10:53:20<13:52:08, 6.99s/it] 41%|████▏ | 5048/12188 [10:53:27<13:54:43, 7.01s/it] {'loss': 0.3335, 'grad_norm': 0.6222639769583619, 'learning_rate': 6.6053623428016535e-06, 'epoch': 0.41} + 41%|████▏ | 5048/12188 [10:53:27<13:54:43, 7.01s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f3ddcf90fe0> +[Try #0] Failed to fetch sample 4587213 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f3ddcf90fe0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Get shortened URL'"}, {'from': 'gpt', 'value': '\nclick(x=0.865, y=0.456)\n'}]} + 41%|████▏ | 5049/12188 [10:53:35<14:25:42, 7.28s/it] {'loss': 0.3955, 'grad_norm': 0.7582516056953096, 'learning_rate': 6.604103928889816e-06, 'epoch': 0.41} + 41%|████▏ | 5049/12188 [10:53:35<14:25:42, 7.28s/it] 41%|████▏ | 5050/12188 [10:53:42<14:13:44, 7.18s/it] {'loss': 0.3274, 'grad_norm': 0.6507232094527547, 'learning_rate': 6.602845401698736e-06, 'epoch': 0.41} + 41%|████▏ | 5050/12188 [10:53:42<14:13:44, 7.18s/it] 41%|████▏ | 5051/12188 [10:53:49<14:31:08, 7.32s/it] {'loss': 0.3589, 'grad_norm': 0.6236274576318941, 'learning_rate': 6.601586761317289e-06, 'epoch': 0.41} + 41%|████▏ | 5051/12188 [10:53:49<14:31:08, 7.32s/it] 41%|████▏ | 5052/12188 [10:53:56<14:23:56, 7.26s/it] {'loss': 0.3786, 'grad_norm': 0.7162303836735988, 'learning_rate': 6.600328007834358e-06, 'epoch': 0.41} + 41%|████▏ | 5052/12188 [10:53:56<14:23:56, 7.26s/it] 41%|████▏ | 5053/12188 [10:54:03<14:10:58, 7.16s/it] {'loss': 0.3226, 'grad_norm': 0.631251380470334, 'learning_rate': 6.599069141338834e-06, 'epoch': 0.41} + 41%|████▏ | 5053/12188 [10:54:03<14:10:58, 7.16s/it] 41%|████▏ | 5054/12188 [10:54:10<14:00:30, 7.07s/it] {'loss': 0.3167, 'grad_norm': 0.6179039658323782, 'learning_rate': 6.5978101619196165e-06, 'epoch': 0.41} + 41%|████▏ | 5054/12188 [10:54:10<14:00:30, 7.07s/it] 41%|████▏ | 5055/12188 [10:54:17<13:52:00, 7.00s/it] {'loss': 0.34, 'grad_norm': 0.6523356349842412, 'learning_rate': 6.596551069665611e-06, 'epoch': 0.41} + 41%|████▏ | 5055/12188 [10:54:17<13:52:00, 7.00s/it] 41%|████▏ | 5056/12188 [10:54:25<14:17:04, 7.21s/it] {'loss': 0.3442, 'grad_norm': 0.6976366814114234, 'learning_rate': 6.595291864665737e-06, 'epoch': 0.41} + 41%|████▏ | 5056/12188 [10:54:25<14:17:04, 7.21s/it] 41%|████▏ | 5057/12188 [10:54:33<14:52:40, 7.51s/it] {'loss': 0.3597, 'grad_norm': 0.6326465448514241, 'learning_rate': 6.594032547008913e-06, 'epoch': 0.41} + 41%|████▏ | 5057/12188 [10:54:33<14:52:40, 7.51s/it] 41%|████▏ | 5058/12188 [10:54:41<14:58:23, 7.56s/it] {'loss': 0.3465, 'grad_norm': 0.6511219360017153, 'learning_rate': 6.592773116784072e-06, 'epoch': 0.41} + 41%|████▏ | 5058/12188 [10:54:41<14:58:23, 7.56s/it] 42%|████▏ | 5059/12188 [10:54:48<14:55:58, 7.54s/it] {'loss': 0.3667, 'grad_norm': 0.611247221964164, 'learning_rate': 6.591513574080152e-06, 'epoch': 0.42} + 42%|████▏ | 5059/12188 [10:54:48<14:55:58, 7.54s/it] 42%|████▏ | 5060/12188 [10:54:56<15:03:09, 7.60s/it] {'loss': 0.3254, 'grad_norm': 0.6396968802643342, 'learning_rate': 6.590253918986099e-06, 'epoch': 0.42} + 42%|████▏ | 5060/12188 [10:54:56<15:03:09, 7.60s/it] 42%|████▏ | 5061/12188 [10:55:03<14:54:44, 7.53s/it] {'loss': 0.3091, 'grad_norm': 0.6376107730003008, 'learning_rate': 6.58899415159087e-06, 'epoch': 0.42} + 42%|████▏ | 5061/12188 [10:55:03<14:54:44, 7.53s/it] 42%|████▏ | 5062/12188 [10:55:12<15:38:01, 7.90s/it] {'loss': 0.3304, 'grad_norm': 0.6324296699526295, 'learning_rate': 6.587734271983429e-06, 'epoch': 0.42} + 42%|████▏ | 5062/12188 [10:55:12<15:38:01, 7.90s/it] 42%|████▏ | 5063/12188 [10:55:19<15:02:05, 7.60s/it] {'loss': 0.3677, 'grad_norm': 0.7193730777701783, 'learning_rate': 6.586474280252744e-06, 'epoch': 0.42} + 42%|████▏ | 5063/12188 [10:55:19<15:02:05, 7.60s/it] 42%|████▏ | 5064/12188 [10:55:27<15:31:54, 7.85s/it] {'loss': 0.3661, 'grad_norm': 0.6201864146979013, 'learning_rate': 6.585214176487793e-06, 'epoch': 0.42} + 42%|████▏ | 5064/12188 [10:55:27<15:31:54, 7.85s/it] 42%|████▏ | 5065/12188 [10:55:34<15:01:52, 7.60s/it] {'loss': 0.36, 'grad_norm': 0.651058540542821, 'learning_rate': 6.583953960777563e-06, 'epoch': 0.42} + 42%|████▏ | 5065/12188 [10:55:34<15:01:52, 7.60s/it] 42%|████▏ | 5066/12188 [10:55:42<14:51:52, 7.51s/it] {'loss': 0.3426, 'grad_norm': 0.6437902598990766, 'learning_rate': 6.58269363321105e-06, 'epoch': 0.42} + 42%|████▏ | 5066/12188 [10:55:42<14:51:52, 7.51s/it] 42%|████▏ | 5067/12188 [10:55:48<14:15:02, 7.20s/it] {'loss': 0.3053, 'grad_norm': 0.6506312834425303, 'learning_rate': 6.581433193877257e-06, 'epoch': 0.42} + 42%|████▏ | 5067/12188 [10:55:48<14:15:02, 7.20s/it] 42%|████▏ | 5068/12188 [10:55:55<14:10:17, 7.17s/it] {'loss': 0.3376, 'grad_norm': 0.6872231678645984, 'learning_rate': 6.5801726428651906e-06, 'epoch': 0.42} + 42%|████▏ | 5068/12188 [10:55:55<14:10:17, 7.17s/it] 42%|████▏ | 5069/12188 [10:56:02<13:48:42, 6.98s/it] {'loss': 0.3261, 'grad_norm': 0.6255709427305678, 'learning_rate': 6.578911980263872e-06, 'epoch': 0.42} + 42%|████▏ | 5069/12188 [10:56:02<13:48:42, 6.98s/it] 42%|████▏ | 5070/12188 [10:56:09<13:45:11, 6.96s/it] {'loss': 0.3184, 'grad_norm': 0.6313467790526498, 'learning_rate': 6.577651206162325e-06, 'epoch': 0.42} + 42%|████▏ | 5070/12188 [10:56:09<13:45:11, 6.96s/it] 42%|████▏ | 5071/12188 [10:56:16<13:50:58, 7.01s/it] {'loss': 0.3434, 'grad_norm': 0.6790914053549727, 'learning_rate': 6.576390320649586e-06, 'epoch': 0.42} + 42%|████▏ | 5071/12188 [10:56:16<13:50:58, 7.01s/it] 42%|████▏ | 5072/12188 [10:56:23<13:43:06, 6.94s/it] {'loss': 0.3344, 'grad_norm': 0.6375229931221533, 'learning_rate': 6.575129323814694e-06, 'epoch': 0.42} + 42%|████▏ | 5072/12188 [10:56:23<13:43:06, 6.94s/it] 42%|████▏ | 5073/12188 [10:56:30<14:15:57, 7.22s/it] {'loss': 0.3808, 'grad_norm': 0.6706996688206027, 'learning_rate': 6.5738682157467e-06, 'epoch': 0.42} + 42%|████▏ | 5073/12188 [10:56:30<14:15:57, 7.22s/it] 42%|████▏ | 5074/12188 [10:56:38<14:32:13, 7.36s/it] {'loss': 0.3403, 'grad_norm': 0.6148390229047223, 'learning_rate': 6.572606996534661e-06, 'epoch': 0.42} + 42%|████▏ | 5074/12188 [10:56:38<14:32:13, 7.36s/it] 42%|████▏ | 5075/12188 [10:56:47<15:34:30, 7.88s/it] {'loss': 0.3357, 'grad_norm': 0.6213202164534833, 'learning_rate': 6.5713456662676424e-06, 'epoch': 0.42} + 42%|████▏ | 5075/12188 [10:56:47<15:34:30, 7.88s/it] 42%|████▏ | 5076/12188 [10:56:54<14:50:50, 7.52s/it] {'loss': 0.339, 'grad_norm': 0.6127336167850783, 'learning_rate': 6.570084225034717e-06, 'epoch': 0.42} + 42%|████▏ | 5076/12188 [10:56:54<14:50:50, 7.52s/it] 42%|████▏ | 5077/12188 [10:57:01<14:39:20, 7.42s/it] {'loss': 0.3329, 'grad_norm': 0.6499561991437104, 'learning_rate': 6.568822672924968e-06, 'epoch': 0.42} + 42%|████▏ | 5077/12188 [10:57:01<14:39:20, 7.42s/it] 42%|████▏ | 5078/12188 [10:57:08<14:17:54, 7.24s/it] {'loss': 0.3441, 'grad_norm': 0.6159956260017914, 'learning_rate': 6.567561010027481e-06, 'epoch': 0.42} + 42%|████▏ | 5078/12188 [10:57:08<14:17:54, 7.24s/it] 42%|████▏ | 5079/12188 [10:57:14<13:51:39, 7.02s/it] {'loss': 0.3712, 'grad_norm': 0.6096562550524244, 'learning_rate': 6.566299236431355e-06, 'epoch': 0.42} + 42%|████▏ | 5079/12188 [10:57:14<13:51:39, 7.02s/it] 42%|████▏ | 5080/12188 [10:57:22<14:31:15, 7.35s/it] {'loss': 0.3151, 'grad_norm': 0.692489448834971, 'learning_rate': 6.565037352225692e-06, 'epoch': 0.42} + 42%|████▏ | 5080/12188 [10:57:22<14:31:15, 7.35s/it] 42%|████▏ | 5081/12188 [10:57:31<14:55:07, 7.56s/it] {'loss': 0.4025, 'grad_norm': 0.7263974375177631, 'learning_rate': 6.563775357499606e-06, 'epoch': 0.42} + 42%|████▏ | 5081/12188 [10:57:31<14:55:07, 7.56s/it] 42%|████▏ | 5082/12188 [10:57:38<14:35:05, 7.39s/it] {'loss': 0.3153, 'grad_norm': 0.6362986087024752, 'learning_rate': 6.562513252342216e-06, 'epoch': 0.42} + 42%|████▏ | 5082/12188 [10:57:38<14:35:05, 7.39s/it] 42%|████▏ | 5083/12188 [10:57:45<14:24:17, 7.30s/it] {'loss': 0.355, 'grad_norm': 0.706697048482703, 'learning_rate': 6.561251036842652e-06, 'epoch': 0.42} + 42%|████▏ | 5083/12188 [10:57:45<14:24:17, 7.30s/it] 42%|████▏ | 5084/12188 [10:57:51<14:06:13, 7.15s/it] {'loss': 0.3376, 'grad_norm': 0.6565062854086267, 'learning_rate': 6.559988711090048e-06, 'epoch': 0.42} + 42%|████▏ | 5084/12188 [10:57:51<14:06:13, 7.15s/it] 42%|████▏ | 5085/12188 [10:57:58<13:51:44, 7.03s/it] {'loss': 0.3452, 'grad_norm': 0.7246218485710798, 'learning_rate': 6.558726275173548e-06, 'epoch': 0.42} + 42%|████▏ | 5085/12188 [10:57:58<13:51:44, 7.03s/it] 42%|████▏ | 5086/12188 [10:58:05<13:52:41, 7.03s/it] {'loss': 0.3611, 'grad_norm': 0.6654911888190091, 'learning_rate': 6.5574637291823005e-06, 'epoch': 0.42} + 42%|████▏ | 5086/12188 [10:58:05<13:52:41, 7.03s/it] 42%|████▏ | 5087/12188 [10:58:12<13:37:40, 6.91s/it] {'loss': 0.3565, 'grad_norm': 0.6649737998123993, 'learning_rate': 6.556201073205468e-06, 'epoch': 0.42} + 42%|████▏ | 5087/12188 [10:58:12<13:37:40, 6.91s/it] 42%|████▏ | 5088/12188 [10:58:19<14:03:59, 7.13s/it] {'loss': 0.3523, 'grad_norm': 0.6448825415273444, 'learning_rate': 6.554938307332216e-06, 'epoch': 0.42} + 42%|████▏ | 5088/12188 [10:58:19<14:03:59, 7.13s/it] 42%|████▏ | 5089/12188 [10:58:27<14:01:15, 7.11s/it] {'loss': 0.3372, 'grad_norm': 0.8023174810400285, 'learning_rate': 6.553675431651718e-06, 'epoch': 0.42} + 42%|████▏ | 5089/12188 [10:58:27<14:01:15, 7.11s/it] 42%|████▏ | 5090/12188 [10:58:34<14:27:16, 7.33s/it] {'loss': 0.3904, 'grad_norm': 0.6361580947397938, 'learning_rate': 6.552412446253159e-06, 'epoch': 0.42} + 42%|████▏ | 5090/12188 [10:58:34<14:27:16, 7.33s/it] 42%|████▏ | 5091/12188 [10:58:42<14:32:18, 7.37s/it] {'loss': 0.3862, 'grad_norm': 0.6700893647650392, 'learning_rate': 6.5511493512257265e-06, 'epoch': 0.42} + 42%|████▏ | 5091/12188 [10:58:42<14:32:18, 7.37s/it] 42%|████▏ | 5092/12188 [10:58:50<15:05:32, 7.66s/it] {'loss': 0.3222, 'grad_norm': 0.9775117940306062, 'learning_rate': 6.549886146658618e-06, 'epoch': 0.42} + 42%|████▏ | 5092/12188 [10:58:50<15:05:32, 7.66s/it] 42%|████▏ | 5093/12188 [10:58:58<15:10:35, 7.70s/it] {'loss': 0.3338, 'grad_norm': 0.7059258508229287, 'learning_rate': 6.5486228326410406e-06, 'epoch': 0.42} + 42%|████▏ | 5093/12188 [10:58:58<15:10:35, 7.70s/it] 42%|████▏ | 5094/12188 [10:59:05<15:03:18, 7.64s/it] {'loss': 0.3261, 'grad_norm': 0.6431229113939733, 'learning_rate': 6.547359409262207e-06, 'epoch': 0.42} + 42%|████▏ | 5094/12188 [10:59:05<15:03:18, 7.64s/it] 42%|████▏ | 5095/12188 [10:59:12<14:33:54, 7.39s/it] {'loss': 0.3203, 'grad_norm': 0.6608087662238608, 'learning_rate': 6.546095876611338e-06, 'epoch': 0.42} + 42%|████▏ | 5095/12188 [10:59:12<14:33:54, 7.39s/it] 42%|████▏ | 5096/12188 [10:59:20<14:40:31, 7.45s/it] {'loss': 0.3525, 'grad_norm': 0.7981759953077381, 'learning_rate': 6.54483223477766e-06, 'epoch': 0.42} + 42%|████▏ | 5096/12188 [10:59:20<14:40:31, 7.45s/it] 42%|████▏ | 5097/12188 [10:59:28<14:55:15, 7.58s/it] {'loss': 0.3531, 'grad_norm': 0.6395807150998258, 'learning_rate': 6.543568483850414e-06, 'epoch': 0.42} + 42%|████▏ | 5097/12188 [10:59:28<14:55:15, 7.58s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f89e2d8fc90> +[Try #0] Failed to fetch sample 4759514 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f89e2d8fc90> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Developers'"}, {'from': 'gpt', 'value': '\nclick(x=0.951, y=0.558)\n'}]} + 42%|████▏ | 5098/12188 [10:59:35<14:36:45, 7.42s/it] {'loss': 0.3836, 'grad_norm': 0.6429427814445986, 'learning_rate': 6.542304623918841e-06, 'epoch': 0.42} + 42%|████▏ | 5098/12188 [10:59:35<14:36:45, 7.42s/it] 42%|████▏ | 5099/12188 [10:59:42<14:22:35, 7.30s/it] {'loss': 0.3467, 'grad_norm': 0.677750762060539, 'learning_rate': 6.541040655072195e-06, 'epoch': 0.42} + 42%|████▏ | 5099/12188 [10:59:42<14:22:35, 7.30s/it] 42%|████▏ | 5100/12188 [10:59:49<14:09:08, 7.19s/it] {'loss': 0.3002, 'grad_norm': 0.5798273680252334, 'learning_rate': 6.5397765773997315e-06, 'epoch': 0.42} + 42%|████▏ | 5100/12188 [10:59:49<14:09:08, 7.19s/it] 42%|████▏ | 5101/12188 [10:59:55<13:53:55, 7.06s/it] {'loss': 0.3816, 'grad_norm': 0.666127162760314, 'learning_rate': 6.53851239099072e-06, 'epoch': 0.42} + 42%|████▏ | 5101/12188 [10:59:55<13:53:55, 7.06s/it] 42%|████▏ | 5102/12188 [11:00:05<15:25:08, 7.83s/it] {'loss': 0.3108, 'grad_norm': 0.6350125306014183, 'learning_rate': 6.537248095934436e-06, 'epoch': 0.42} + 42%|████▏ | 5102/12188 [11:00:05<15:25:08, 7.83s/it] 42%|████▏ | 5103/12188 [11:00:12<14:46:09, 7.50s/it] {'loss': 0.3428, 'grad_norm': 0.6715250123708563, 'learning_rate': 6.535983692320161e-06, 'epoch': 0.42} + 42%|████▏ | 5103/12188 [11:00:12<14:46:09, 7.50s/it] 42%|████▏ | 5104/12188 [11:00:19<14:23:05, 7.31s/it] {'loss': 0.3455, 'grad_norm': 0.6551200243017691, 'learning_rate': 6.534719180237185e-06, 'epoch': 0.42} + 42%|████▏ | 5104/12188 [11:00:19<14:23:05, 7.31s/it] 42%|████▏ | 5105/12188 [11:00:26<14:09:18, 7.19s/it] {'loss': 0.3265, 'grad_norm': 0.622514392963851, 'learning_rate': 6.5334545597748075e-06, 'epoch': 0.42} + 42%|████▏ | 5105/12188 [11:00:26<14:09:18, 7.19s/it] 42%|████▏ | 5106/12188 [11:00:32<13:49:17, 7.03s/it] {'loss': 0.3632, 'grad_norm': 0.7169768199589607, 'learning_rate': 6.532189831022332e-06, 'epoch': 0.42} + 42%|████▏ | 5106/12188 [11:00:32<13:49:17, 7.03s/it] 42%|████▏ | 5107/12188 [11:00:39<13:35:17, 6.91s/it] {'loss': 0.3615, 'grad_norm': 0.6779559285649769, 'learning_rate': 6.530924994069072e-06, 'epoch': 0.42} + 42%|████▏ | 5107/12188 [11:00:39<13:35:17, 6.91s/it] 42%|████▏ | 5108/12188 [11:00:46<13:37:02, 6.92s/it] {'loss': 0.3844, 'grad_norm': 0.6429137550679064, 'learning_rate': 6.529660049004349e-06, 'epoch': 0.42} + 42%|████▏ | 5108/12188 [11:00:46<13:37:02, 6.92s/it] 42%|████▏ | 5109/12188 [11:00:53<13:40:09, 6.95s/it] {'loss': 0.3534, 'grad_norm': 0.62625198760087, 'learning_rate': 6.52839499591749e-06, 'epoch': 0.42} + 42%|████▏ | 5109/12188 [11:00:53<13:40:09, 6.95s/it] 42%|████▏ | 5110/12188 [11:01:01<14:13:50, 7.24s/it] {'loss': 0.3173, 'grad_norm': 0.6522556194565455, 'learning_rate': 6.527129834897833e-06, 'epoch': 0.42} + 42%|████▏ | 5110/12188 [11:01:01<14:13:50, 7.24s/it] 42%|████▏ | 5111/12188 [11:01:09<14:37:38, 7.44s/it] {'loss': 0.3848, 'grad_norm': 0.7877024825378831, 'learning_rate': 6.52586456603472e-06, 'epoch': 0.42} + 42%|████▏ | 5111/12188 [11:01:09<14:37:38, 7.44s/it] 42%|████▏ | 5112/12188 [11:01:16<14:35:27, 7.42s/it] {'loss': 0.3464, 'grad_norm': 0.646900802180497, 'learning_rate': 6.524599189417503e-06, 'epoch': 0.42} + 42%|████▏ | 5112/12188 [11:01:16<14:35:27, 7.42s/it] 42%|████▏ | 5113/12188 [11:01:25<15:33:19, 7.92s/it] {'loss': 0.3238, 'grad_norm': 0.6524993889663047, 'learning_rate': 6.523333705135542e-06, 'epoch': 0.42} + 42%|████▏ | 5113/12188 [11:01:25<15:33:19, 7.92s/it] 42%|████▏ | 5114/12188 [11:01:32<15:11:11, 7.73s/it] {'loss': 0.3497, 'grad_norm': 0.6584697092461032, 'learning_rate': 6.522068113278199e-06, 'epoch': 0.42} + 42%|████▏ | 5114/12188 [11:01:32<15:11:11, 7.73s/it] 42%|████▏ | 5115/12188 [11:01:41<15:23:11, 7.83s/it] {'loss': 0.3496, 'grad_norm': 0.6140846600838934, 'learning_rate': 6.520802413934855e-06, 'epoch': 0.42} + 42%|████▏ | 5115/12188 [11:01:41<15:23:11, 7.83s/it] 42%|████▏ | 5116/12188 [11:01:48<15:07:02, 7.70s/it] {'loss': 0.3352, 'grad_norm': 0.6866999449748009, 'learning_rate': 6.519536607194885e-06, 'epoch': 0.42} + 42%|████▏ | 5116/12188 [11:01:48<15:07:02, 7.70s/it] 42%|████▏ | 5117/12188 [11:01:55<14:28:12, 7.37s/it] {'loss': 0.3775, 'grad_norm': 0.6670290636997397, 'learning_rate': 6.518270693147682e-06, 'epoch': 0.42} + 42%|████▏ | 5117/12188 [11:01:55<14:28:12, 7.37s/it] 42%|████▏ | 5118/12188 [11:02:01<14:03:07, 7.16s/it] {'loss': 0.3735, 'grad_norm': 0.6682563661796914, 'learning_rate': 6.517004671882643e-06, 'epoch': 0.42} + 42%|████▏ | 5118/12188 [11:02:01<14:03:07, 7.16s/it] 42%|████▏ | 5119/12188 [11:02:11<15:33:00, 7.92s/it] {'loss': 0.3201, 'grad_norm': 0.6304420809434466, 'learning_rate': 6.51573854348917e-06, 'epoch': 0.42} + 42%|████▏ | 5119/12188 [11:02:11<15:33:00, 7.92s/it] 42%|████▏ | 5120/12188 [11:02:18<14:51:43, 7.57s/it] {'loss': 0.3347, 'grad_norm': 0.6003627795122776, 'learning_rate': 6.514472308056677e-06, 'epoch': 0.42} + 42%|████▏ | 5120/12188 [11:02:18<14:51:43, 7.57s/it] 42%|████▏ | 5121/12188 [11:02:26<15:22:16, 7.83s/it] {'loss': 0.3292, 'grad_norm': 0.6834723478763514, 'learning_rate': 6.513205965674583e-06, 'epoch': 0.42} + 42%|████▏ | 5121/12188 [11:02:26<15:22:16, 7.83s/it] 42%|████▏ | 5122/12188 [11:02:34<15:10:00, 7.73s/it] {'loss': 0.3207, 'grad_norm': 0.5904014297528934, 'learning_rate': 6.511939516432313e-06, 'epoch': 0.42} + 42%|████▏ | 5122/12188 [11:02:34<15:10:00, 7.73s/it] 42%|████▏ | 5123/12188 [11:02:41<14:54:32, 7.60s/it] {'loss': 0.3202, 'grad_norm': 0.6241085154423279, 'learning_rate': 6.5106729604193045e-06, 'epoch': 0.42} + 42%|████▏ | 5123/12188 [11:02:41<14:54:32, 7.60s/it] 42%|████▏ | 5124/12188 [11:02:48<14:23:54, 7.34s/it] {'loss': 0.4185, 'grad_norm': 0.6858295611488902, 'learning_rate': 6.5094062977249985e-06, 'epoch': 0.42} + 42%|████▏ | 5124/12188 [11:02:48<14:23:54, 7.34s/it] 42%|████▏ | 5125/12188 [11:02:55<14:35:12, 7.43s/it] {'loss': 0.3608, 'grad_norm': 0.6722858420586281, 'learning_rate': 6.508139528438846e-06, 'epoch': 0.42} + 42%|████▏ | 5125/12188 [11:02:55<14:35:12, 7.43s/it] 42%|████▏ | 5126/12188 [11:03:02<14:15:26, 7.27s/it] {'loss': 0.339, 'grad_norm': 0.6686253691235785, 'learning_rate': 6.5068726526503026e-06, 'epoch': 0.42} + 42%|████▏ | 5126/12188 [11:03:02<14:15:26, 7.27s/it] 42%|████▏ | 5127/12188 [11:03:09<14:06:56, 7.20s/it] {'loss': 0.3603, 'grad_norm': 0.6490139173847522, 'learning_rate': 6.505605670448832e-06, 'epoch': 0.42} + 42%|████▏ | 5127/12188 [11:03:09<14:06:56, 7.20s/it] 42%|████▏ | 5128/12188 [11:03:16<13:51:05, 7.06s/it] {'loss': 0.3696, 'grad_norm': 0.6919949925551347, 'learning_rate': 6.5043385819239095e-06, 'epoch': 0.42} + 42%|████▏ | 5128/12188 [11:03:16<13:51:05, 7.06s/it] 42%|████▏ | 5129/12188 [11:03:23<13:58:06, 7.12s/it] {'loss': 0.3218, 'grad_norm': 0.6706837027821417, 'learning_rate': 6.503071387165012e-06, 'epoch': 0.42} + 42%|████▏ | 5129/12188 [11:03:23<13:58:06, 7.12s/it] 42%|████▏ | 5130/12188 [11:03:31<14:07:29, 7.20s/it] {'loss': 0.3382, 'grad_norm': 0.6421837180712053, 'learning_rate': 6.50180408626163e-06, 'epoch': 0.42} + 42%|████▏ | 5130/12188 [11:03:31<14:07:29, 7.20s/it] 42%|████▏ | 5131/12188 [11:03:39<14:53:02, 7.59s/it] {'loss': 0.333, 'grad_norm': 0.6425590303012557, 'learning_rate': 6.500536679303254e-06, 'epoch': 0.42} + 42%|████▏ | 5131/12188 [11:03:39<14:53:02, 7.59s/it] 42%|████▏ | 5132/12188 [11:03:46<14:41:12, 7.49s/it] {'loss': 0.3298, 'grad_norm': 0.7879399601202434, 'learning_rate': 6.499269166379389e-06, 'epoch': 0.42} + 42%|████▏ | 5132/12188 [11:03:46<14:41:12, 7.49s/it] 42%|████▏ | 5133/12188 [11:03:53<14:21:01, 7.32s/it] {'loss': 0.3613, 'grad_norm': 0.6388955790303262, 'learning_rate': 6.498001547579545e-06, 'epoch': 0.42} + 42%|████▏ | 5133/12188 [11:03:53<14:21:01, 7.32s/it] 42%|████▏ | 5134/12188 [11:04:01<14:19:26, 7.31s/it] {'loss': 0.3557, 'grad_norm': 0.6727981746768384, 'learning_rate': 6.4967338229932385e-06, 'epoch': 0.42} + 42%|████▏ | 5134/12188 [11:04:01<14:19:26, 7.31s/it] 42%|████▏ | 5135/12188 [11:04:08<14:09:24, 7.23s/it] {'loss': 0.312, 'grad_norm': 0.5999735811434077, 'learning_rate': 6.495465992709994e-06, 'epoch': 0.42} + 42%|████▏ | 5135/12188 [11:04:08<14:09:24, 7.23s/it] 42%|████▏ | 5136/12188 [11:04:14<13:47:12, 7.04s/it] {'loss': 0.3185, 'grad_norm': 0.6235710902549307, 'learning_rate': 6.494198056819343e-06, 'epoch': 0.42} + 42%|████▏ | 5136/12188 [11:04:14<13:47:12, 7.04s/it] 42%|████▏ | 5137/12188 [11:04:21<13:49:19, 7.06s/it] {'loss': 0.3368, 'grad_norm': 0.6217402296585255, 'learning_rate': 6.492930015410826e-06, 'epoch': 0.42} + 42%|████▏ | 5137/12188 [11:04:21<13:49:19, 7.06s/it] 42%|████▏ | 5138/12188 [11:04:29<14:05:50, 7.20s/it] {'loss': 0.3484, 'grad_norm': 0.6526706913681306, 'learning_rate': 6.49166186857399e-06, 'epoch': 0.42} + 42%|████▏ | 5138/12188 [11:04:29<14:05:50, 7.20s/it] 42%|████▏ | 5139/12188 [11:04:37<14:36:55, 7.46s/it] {'loss': 0.3651, 'grad_norm': 0.6168835642621397, 'learning_rate': 6.4903936163983895e-06, 'epoch': 0.42} + 42%|████▏ | 5139/12188 [11:04:37<14:36:55, 7.46s/it] 42%|████▏ | 5140/12188 [11:04:46<15:28:47, 7.91s/it] {'loss': 0.308, 'grad_norm': 0.6231982961736996, 'learning_rate': 6.4891252589735875e-06, 'epoch': 0.42} + 42%|████▏ | 5140/12188 [11:04:46<15:28:47, 7.91s/it] 42%|████▏ | 5141/12188 [11:04:53<14:52:33, 7.60s/it] {'loss': 0.3389, 'grad_norm': 0.6414335877794801, 'learning_rate': 6.48785679638915e-06, 'epoch': 0.42} + 42%|████▏ | 5141/12188 [11:04:53<14:52:33, 7.60s/it] 42%|████▏ | 5142/12188 [11:05:00<14:56:37, 7.64s/it] {'loss': 0.3737, 'grad_norm': 0.6389372010337849, 'learning_rate': 6.486588228734658e-06, 'epoch': 0.42} + 42%|████▏ | 5142/12188 [11:05:00<14:56:37, 7.64s/it] 42%|████▏ | 5143/12188 [11:05:08<15:00:30, 7.67s/it] {'loss': 0.3407, 'grad_norm': 0.6157694245536742, 'learning_rate': 6.485319556099693e-06, 'epoch': 0.42} + 42%|████▏ | 5143/12188 [11:05:08<15:00:30, 7.67s/it] 42%|████▏ | 5144/12188 [11:05:15<14:39:48, 7.49s/it] {'loss': 0.3109, 'grad_norm': 0.6538066859436552, 'learning_rate': 6.484050778573847e-06, 'epoch': 0.42} + 42%|████▏ | 5144/12188 [11:05:15<14:39:48, 7.49s/it] 42%|████▏ | 5145/12188 [11:05:23<14:34:52, 7.45s/it] {'loss': 0.3179, 'grad_norm': 0.5982155101437681, 'learning_rate': 6.482781896246718e-06, 'epoch': 0.42} + 42%|████▏ | 5145/12188 [11:05:23<14:34:52, 7.45s/it] 42%|████▏ | 5146/12188 [11:05:29<14:12:59, 7.27s/it] {'loss': 0.353, 'grad_norm': 0.6409813849173083, 'learning_rate': 6.481512909207915e-06, 'epoch': 0.42} + 42%|████▏ | 5146/12188 [11:05:29<14:12:59, 7.27s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f056d343790> +[Try #0] Failed to fetch sample 4426205 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f056d343790> +Problematic sample: {'image': '20240822_131046_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Integral'"}, {'from': 'gpt', 'value': '\nclick(x=0.3355, y=0.139)\n'}]} + 42%|████▏ | 5147/12188 [11:05:37<14:12:28, 7.26s/it] {'loss': 0.3309, 'grad_norm': 0.6606571796837007, 'learning_rate': 6.480243817547051e-06, 'epoch': 0.42} + 42%|████▏ | 5147/12188 [11:05:37<14:12:28, 7.26s/it] 42%|████▏ | 5148/12188 [11:05:46<15:21:50, 7.86s/it] {'loss': 0.3258, 'grad_norm': 0.6050975223031237, 'learning_rate': 6.4789746213537475e-06, 'epoch': 0.42} + 42%|████▏ | 5148/12188 [11:05:46<15:21:50, 7.86s/it] 42%|████▏ | 5149/12188 [11:05:55<15:48:43, 8.09s/it] {'loss': 0.3382, 'grad_norm': 0.6336891024507462, 'learning_rate': 6.477705320717631e-06, 'epoch': 0.42} + 42%|████▏ | 5149/12188 [11:05:55<15:48:43, 8.09s/it] 42%|████▏ | 5150/12188 [11:06:01<15:00:35, 7.68s/it] {'loss': 0.375, 'grad_norm': 0.6128775665976234, 'learning_rate': 6.4764359157283385e-06, 'epoch': 0.42} + 42%|████▏ | 5150/12188 [11:06:01<15:00:35, 7.68s/it] 42%|████▏ | 5151/12188 [11:06:09<14:52:47, 7.61s/it] {'loss': 0.3185, 'grad_norm': 0.63272285733197, 'learning_rate': 6.475166406475515e-06, 'epoch': 0.42} + 42%|████▏ | 5151/12188 [11:06:09<14:52:47, 7.61s/it] 42%|████▏ | 5152/12188 [11:06:15<14:22:55, 7.36s/it] {'loss': 0.3299, 'grad_norm': 0.745781090368366, 'learning_rate': 6.473896793048808e-06, 'epoch': 0.42} + 42%|████▏ | 5152/12188 [11:06:16<14:22:55, 7.36s/it] 42%|████▏ | 5153/12188 [11:06:24<15:11:16, 7.77s/it] {'loss': 0.38, 'grad_norm': 0.6163465611759131, 'learning_rate': 6.472627075537879e-06, 'epoch': 0.42} + 42%|████▏ | 5153/12188 [11:06:24<15:11:16, 7.77s/it] 42%|████▏ | 5154/12188 [11:06:32<14:58:43, 7.67s/it] {'loss': 0.343, 'grad_norm': 0.6115683748262448, 'learning_rate': 6.471357254032394e-06, 'epoch': 0.42} + 42%|████▏ | 5154/12188 [11:06:32<14:58:43, 7.67s/it] 42%|████▏ | 5155/12188 [11:06:39<15:00:02, 7.68s/it] {'loss': 0.3002, 'grad_norm': 0.5769321836362016, 'learning_rate': 6.47008732862202e-06, 'epoch': 0.42} + 42%|████▏ | 5155/12188 [11:06:39<15:00:02, 7.68s/it] 42%|████▏ | 5156/12188 [11:06:47<14:54:43, 7.63s/it] {'loss': 0.3307, 'grad_norm': 0.693161448636482, 'learning_rate': 6.468817299396442e-06, 'epoch': 0.42} + 42%|████▏ | 5156/12188 [11:06:47<14:54:43, 7.63s/it] 42%|████▏ | 5157/12188 [11:06:54<14:41:26, 7.52s/it] {'loss': 0.3691, 'grad_norm': 0.7334249379087152, 'learning_rate': 6.467547166445347e-06, 'epoch': 0.42} + 42%|████▏ | 5157/12188 [11:06:54<14:41:26, 7.52s/it] 42%|████▏ | 5158/12188 [11:07:01<14:29:03, 7.42s/it] {'loss': 0.3268, 'grad_norm': 0.6765202913300221, 'learning_rate': 6.466276929858428e-06, 'epoch': 0.42} + 42%|████▏ | 5158/12188 [11:07:01<14:29:03, 7.42s/it] 42%|████▏ | 5159/12188 [11:07:08<14:11:33, 7.27s/it] {'loss': 0.3849, 'grad_norm': 0.6789276754008495, 'learning_rate': 6.465006589725386e-06, 'epoch': 0.42} + 42%|████▏ | 5159/12188 [11:07:08<14:11:33, 7.27s/it] 42%|████▏ | 5160/12188 [11:07:15<14:10:47, 7.26s/it] {'loss': 0.3176, 'grad_norm': 0.6364127168570713, 'learning_rate': 6.463736146135935e-06, 'epoch': 0.42} + 42%|████▏ | 5160/12188 [11:07:16<14:10:47, 7.26s/it] 42%|████▏ | 5161/12188 [11:07:24<14:59:49, 7.68s/it] {'loss': 0.3129, 'grad_norm': 0.6616168869162704, 'learning_rate': 6.46246559917979e-06, 'epoch': 0.42} + 42%|████▏ | 5161/12188 [11:07:24<14:59:49, 7.68s/it] 42%|████▏ | 5162/12188 [11:07:31<14:42:04, 7.53s/it] {'loss': 0.3736, 'grad_norm': 0.7708904148942255, 'learning_rate': 6.461194948946672e-06, 'epoch': 0.42} + 42%|████▏ | 5162/12188 [11:07:31<14:42:04, 7.53s/it] 42%|████▏ | 5163/12188 [11:07:39<14:38:04, 7.50s/it] {'loss': 0.3678, 'grad_norm': 0.6767272494905269, 'learning_rate': 6.4599241955263145e-06, 'epoch': 0.42} + 42%|████▏ | 5163/12188 [11:07:39<14:38:04, 7.50s/it] 42%|████▏ | 5164/12188 [11:07:46<14:42:52, 7.54s/it] {'loss': 0.3258, 'grad_norm': 0.6596296536584656, 'learning_rate': 6.458653339008456e-06, 'epoch': 0.42} + 42%|████▏ | 5164/12188 [11:07:46<14:42:52, 7.54s/it] 42%|████▏ | 5165/12188 [11:07:53<14:13:55, 7.30s/it] {'loss': 0.3672, 'grad_norm': 0.6686029693003641, 'learning_rate': 6.457382379482842e-06, 'epoch': 0.42} + 42%|████▏ | 5165/12188 [11:07:53<14:13:55, 7.30s/it] 42%|████▏ | 5166/12188 [11:08:01<14:34:04, 7.47s/it] {'loss': 0.3326, 'grad_norm': 0.6871668801814365, 'learning_rate': 6.456111317039226e-06, 'epoch': 0.42} + 42%|████▏ | 5166/12188 [11:08:01<14:34:04, 7.47s/it] 42%|████▏ | 5167/12188 [11:08:08<14:32:47, 7.46s/it] {'loss': 0.339, 'grad_norm': 0.6224213201921864, 'learning_rate': 6.454840151767369e-06, 'epoch': 0.42} + 42%|████▏ | 5167/12188 [11:08:08<14:32:47, 7.46s/it] 42%|████▏ | 5168/12188 [11:08:15<14:14:41, 7.31s/it] {'loss': 0.3913, 'grad_norm': 0.6426333242406136, 'learning_rate': 6.453568883757038e-06, 'epoch': 0.42} + 42%|████▏ | 5168/12188 [11:08:15<14:14:41, 7.31s/it] 42%|████▏ | 5169/12188 [11:08:22<14:04:17, 7.22s/it] {'loss': 0.3366, 'grad_norm': 0.7206447243653405, 'learning_rate': 6.452297513098006e-06, 'epoch': 0.42} + 42%|████▏ | 5169/12188 [11:08:22<14:04:17, 7.22s/it] 42%|████▏ | 5170/12188 [11:08:29<13:51:36, 7.11s/it] {'loss': 0.3342, 'grad_norm': 0.6780156884610461, 'learning_rate': 6.451026039880059e-06, 'epoch': 0.42} + 42%|████▏ | 5170/12188 [11:08:29<13:51:36, 7.11s/it] 42%|████▏ | 5171/12188 [11:08:40<16:08:45, 8.28s/it] {'loss': 0.3214, 'grad_norm': 0.6392570460908237, 'learning_rate': 6.449754464192983e-06, 'epoch': 0.42} + 42%|████▏ | 5171/12188 [11:08:40<16:08:45, 8.28s/it] 42%|████▏ | 5172/12188 [11:08:48<15:46:58, 8.10s/it] {'loss': 0.3014, 'grad_norm': 0.700224876897939, 'learning_rate': 6.448482786126575e-06, 'epoch': 0.42} + 42%|████▏ | 5172/12188 [11:08:48<15:46:58, 8.10s/it] 42%|████▏ | 5173/12188 [11:08:56<15:30:12, 7.96s/it] {'loss': 0.333, 'grad_norm': 0.633758341427693, 'learning_rate': 6.447211005770641e-06, 'epoch': 0.42} + 42%|████▏ | 5173/12188 [11:08:56<15:30:12, 7.96s/it] 42%|████▏ | 5174/12188 [11:09:03<14:57:17, 7.68s/it] {'loss': 0.3676, 'grad_norm': 0.702374111251984, 'learning_rate': 6.445939123214991e-06, 'epoch': 0.42} + 42%|████▏ | 5174/12188 [11:09:03<14:57:17, 7.68s/it] 42%|████▏ | 5175/12188 [11:09:09<14:23:17, 7.39s/it] {'loss': 0.3216, 'grad_norm': 0.6512145731144774, 'learning_rate': 6.444667138549444e-06, 'epoch': 0.42} + 42%|████▏ | 5175/12188 [11:09:09<14:23:17, 7.39s/it] 42%|████▏ | 5176/12188 [11:09:17<14:44:02, 7.56s/it] {'loss': 0.319, 'grad_norm': 0.625002260547807, 'learning_rate': 6.443395051863824e-06, 'epoch': 0.42} + 42%|████▏ | 5176/12188 [11:09:17<14:44:02, 7.56s/it] 42%|████▏ | 5177/12188 [11:09:24<14:31:19, 7.46s/it] {'loss': 0.3188, 'grad_norm': 0.676068702594816, 'learning_rate': 6.442122863247962e-06, 'epoch': 0.42} + 42%|████▏ | 5177/12188 [11:09:24<14:31:19, 7.46s/it] 42%|████▏ | 5178/12188 [11:09:32<14:18:18, 7.35s/it] {'loss': 0.3539, 'grad_norm': 0.654211660610666, 'learning_rate': 6.440850572791703e-06, 'epoch': 0.42} + 42%|████▏ | 5178/12188 [11:09:32<14:18:18, 7.35s/it] 42%|████▏ | 5179/12188 [11:09:39<14:35:29, 7.49s/it] {'loss': 0.3434, 'grad_norm': 0.7322512914799778, 'learning_rate': 6.439578180584891e-06, 'epoch': 0.42} + 42%|████▏ | 5179/12188 [11:09:39<14:35:29, 7.49s/it] 43%|████▎ | 5180/12188 [11:09:46<14:02:05, 7.21s/it] {'loss': 0.3609, 'grad_norm': 0.6878009154107956, 'learning_rate': 6.438305686717381e-06, 'epoch': 0.42} + 43%|████▎ | 5180/12188 [11:09:46<14:02:05, 7.21s/it] 43%|████▎ | 5181/12188 [11:09:53<14:06:58, 7.25s/it] {'loss': 0.3369, 'grad_norm': 0.6275083242932019, 'learning_rate': 6.437033091279035e-06, 'epoch': 0.43} + 43%|████▎ | 5181/12188 [11:09:53<14:06:58, 7.25s/it] 43%|████▎ | 5182/12188 [11:10:00<13:44:56, 7.06s/it] {'loss': 0.3525, 'grad_norm': 0.6498015079937223, 'learning_rate': 6.435760394359719e-06, 'epoch': 0.43} + 43%|████▎ | 5182/12188 [11:10:00<13:44:56, 7.06s/it] 43%|████▎ | 5183/12188 [11:10:07<13:33:43, 6.97s/it] {'loss': 0.3524, 'grad_norm': 0.647568894711256, 'learning_rate': 6.434487596049313e-06, 'epoch': 0.43} + 43%|████▎ | 5183/12188 [11:10:07<13:33:43, 6.97s/it] 43%|████▎ | 5184/12188 [11:10:14<13:54:15, 7.15s/it] {'loss': 0.3179, 'grad_norm': 0.6024460731169333, 'learning_rate': 6.433214696437695e-06, 'epoch': 0.43} + 43%|████▎ | 5184/12188 [11:10:14<13:54:15, 7.15s/it] 43%|████▎ | 5185/12188 [11:10:22<14:02:49, 7.22s/it] {'loss': 0.3426, 'grad_norm': 0.6468147835518474, 'learning_rate': 6.43194169561476e-06, 'epoch': 0.43} + 43%|████▎ | 5185/12188 [11:10:22<14:02:49, 7.22s/it] 43%|████▎ | 5186/12188 [11:10:29<14:13:13, 7.31s/it] {'loss': 0.3155, 'grad_norm': 0.6444681840094403, 'learning_rate': 6.4306685936704e-06, 'epoch': 0.43} + 43%|████▎ | 5186/12188 [11:10:29<14:13:13, 7.31s/it] 43%|████▎ | 5187/12188 [11:10:36<14:09:08, 7.28s/it] {'loss': 0.3206, 'grad_norm': 0.6125312370168582, 'learning_rate': 6.429395390694525e-06, 'epoch': 0.43} + 43%|████▎ | 5187/12188 [11:10:36<14:09:08, 7.28s/it] 43%|████▎ | 5188/12188 [11:10:43<14:00:23, 7.20s/it] {'loss': 0.3694, 'grad_norm': 0.6719795895467638, 'learning_rate': 6.428122086777044e-06, 'epoch': 0.43} + 43%|████▎ | 5188/12188 [11:10:43<14:00:23, 7.20s/it] 43%|████▎ | 5189/12188 [11:10:50<13:45:20, 7.08s/it] {'loss': 0.3365, 'grad_norm': 0.7072844437884663, 'learning_rate': 6.426848682007876e-06, 'epoch': 0.43} + 43%|████▎ | 5189/12188 [11:10:50<13:45:20, 7.08s/it] 43%|████▎ | 5190/12188 [11:11:00<15:08:51, 7.79s/it] {'loss': 0.3437, 'grad_norm': 0.7203318760489152, 'learning_rate': 6.425575176476944e-06, 'epoch': 0.43} + 43%|████▎ | 5190/12188 [11:11:00<15:08:51, 7.79s/it] 43%|████▎ | 5191/12188 [11:11:07<14:38:56, 7.54s/it] {'loss': 0.3792, 'grad_norm': 0.6255127061873847, 'learning_rate': 6.4243015702741854e-06, 'epoch': 0.43} + 43%|████▎ | 5191/12188 [11:11:07<14:38:56, 7.54s/it] 43%|████▎ | 5192/12188 [11:11:13<14:06:15, 7.26s/it] {'loss': 0.3282, 'grad_norm': 0.641770814738569, 'learning_rate': 6.423027863489539e-06, 'epoch': 0.43} + 43%|████▎ | 5192/12188 [11:11:13<14:06:15, 7.26s/it] 43%|████▎ | 5193/12188 [11:11:20<13:55:36, 7.17s/it] {'loss': 0.3463, 'grad_norm': 0.718779275273746, 'learning_rate': 6.421754056212949e-06, 'epoch': 0.43} + 43%|████▎ | 5193/12188 [11:11:20<13:55:36, 7.17s/it] 43%|████▎ | 5194/12188 [11:11:27<13:35:39, 7.00s/it] {'loss': 0.3853, 'grad_norm': 0.701082155676886, 'learning_rate': 6.420480148534373e-06, 'epoch': 0.43} + 43%|████▎ | 5194/12188 [11:11:27<13:35:39, 7.00s/it] 43%|████▎ | 5195/12188 [11:11:35<14:13:03, 7.32s/it] {'loss': 0.3361, 'grad_norm': 0.5919362274482954, 'learning_rate': 6.41920614054377e-06, 'epoch': 0.43} + 43%|████▎ | 5195/12188 [11:11:35<14:13:03, 7.32s/it] 43%|████▎ | 5196/12188 [11:11:42<13:51:30, 7.14s/it] {'loss': 0.3252, 'grad_norm': 0.7038698476091733, 'learning_rate': 6.417932032331111e-06, 'epoch': 0.43} + 43%|████▎ | 5196/12188 [11:11:42<13:51:30, 7.14s/it] 43%|████▎ | 5197/12188 [11:11:49<13:49:17, 7.12s/it] {'loss': 0.3302, 'grad_norm': 0.6273004494594092, 'learning_rate': 6.41665782398637e-06, 'epoch': 0.43} + 43%|████▎ | 5197/12188 [11:11:49<13:49:17, 7.12s/it] 43%|████▎ | 5198/12188 [11:11:57<14:23:51, 7.42s/it] {'loss': 0.3222, 'grad_norm': 0.6305240533975135, 'learning_rate': 6.415383515599528e-06, 'epoch': 0.43} + 43%|████▎ | 5198/12188 [11:11:57<14:23:51, 7.42s/it] 43%|████▎ | 5199/12188 [11:12:04<14:10:50, 7.30s/it] {'loss': 0.3294, 'grad_norm': 0.6324784597258244, 'learning_rate': 6.414109107260576e-06, 'epoch': 0.43} + 43%|████▎ | 5199/12188 [11:12:04<14:10:50, 7.30s/it] 43%|████▎ | 5200/12188 [11:12:11<13:59:52, 7.21s/it] {'loss': 0.2929, 'grad_norm': 0.6603074343241788, 'learning_rate': 6.412834599059512e-06, 'epoch': 0.43} + 43%|████▎ | 5200/12188 [11:12:11<13:59:52, 7.21s/it] 43%|████▎ | 5201/12188 [11:12:18<14:06:59, 7.27s/it] {'loss': 0.3752, 'grad_norm': 0.6752060611461368, 'learning_rate': 6.411559991086338e-06, 'epoch': 0.43} + 43%|████▎ | 5201/12188 [11:12:18<14:06:59, 7.27s/it] 43%|████▎ | 5202/12188 [11:12:25<13:42:32, 7.06s/it] {'loss': 0.3427, 'grad_norm': 0.6881531338194561, 'learning_rate': 6.410285283431066e-06, 'epoch': 0.43} + 43%|████▎ | 5202/12188 [11:12:25<13:42:32, 7.06s/it] 43%|████▎ | 5203/12188 [11:12:32<13:36:50, 7.02s/it] {'loss': 0.3226, 'grad_norm': 0.7001727066164868, 'learning_rate': 6.409010476183713e-06, 'epoch': 0.43} + 43%|████▎ | 5203/12188 [11:12:32<13:36:50, 7.02s/it] 43%|████▎ | 5204/12188 [11:12:38<13:30:31, 6.96s/it] {'loss': 0.3159, 'grad_norm': 0.6160462829846428, 'learning_rate': 6.407735569434304e-06, 'epoch': 0.43} + 43%|████▎ | 5204/12188 [11:12:38<13:30:31, 6.96s/it] 43%|████▎ | 5205/12188 [11:12:46<14:00:03, 7.22s/it] {'loss': 0.3503, 'grad_norm': 0.6241510678399531, 'learning_rate': 6.40646056327287e-06, 'epoch': 0.43} + 43%|████▎ | 5205/12188 [11:12:46<14:00:03, 7.22s/it] 43%|████▎ | 5206/12188 [11:12:53<13:51:34, 7.15s/it] {'loss': 0.3115, 'grad_norm': 0.6557599440524775, 'learning_rate': 6.40518545778945e-06, 'epoch': 0.43} + 43%|████▎ | 5206/12188 [11:12:53<13:51:34, 7.15s/it] 43%|████▎ | 5207/12188 [11:13:01<14:00:44, 7.23s/it] {'loss': 0.3305, 'grad_norm': 0.683439408575065, 'learning_rate': 6.403910253074091e-06, 'epoch': 0.43} + 43%|████▎ | 5207/12188 [11:13:01<14:00:44, 7.23s/it] 43%|████▎ | 5208/12188 [11:13:07<13:43:18, 7.08s/it] {'loss': 0.352, 'grad_norm': 0.6602701443845505, 'learning_rate': 6.402634949216846e-06, 'epoch': 0.43} + 43%|████▎ | 5208/12188 [11:13:07<13:43:18, 7.08s/it] 43%|████▎ | 5209/12188 [11:13:14<13:43:37, 7.08s/it] {'loss': 0.3538, 'grad_norm': 0.6793466778938498, 'learning_rate': 6.401359546307775e-06, 'epoch': 0.43} + 43%|████▎ | 5209/12188 [11:13:15<13:43:37, 7.08s/it] 43%|████▎ | 5210/12188 [11:13:22<13:44:14, 7.09s/it] {'loss': 0.3032, 'grad_norm': 0.5902600077754572, 'learning_rate': 6.400084044436944e-06, 'epoch': 0.43} + 43%|████▎ | 5210/12188 [11:13:22<13:44:14, 7.09s/it] 43%|████▎ | 5211/12188 [11:13:29<13:42:38, 7.07s/it] {'loss': 0.3491, 'grad_norm': 0.6239050078336231, 'learning_rate': 6.398808443694426e-06, 'epoch': 0.43} + 43%|████▎ | 5211/12188 [11:13:29<13:42:38, 7.07s/it] 43%|████▎ | 5212/12188 [11:13:35<13:23:24, 6.91s/it] {'loss': 0.3431, 'grad_norm': 0.6897131744938616, 'learning_rate': 6.397532744170305e-06, 'epoch': 0.43} + 43%|████▎ | 5212/12188 [11:13:35<13:23:24, 6.91s/it] 43%|████▎ | 5213/12188 [11:13:43<13:43:32, 7.08s/it] {'loss': 0.3699, 'grad_norm': 0.6630845692764604, 'learning_rate': 6.3962569459546655e-06, 'epoch': 0.43} + 43%|████▎ | 5213/12188 [11:13:43<13:43:32, 7.08s/it] 43%|████▎ | 5214/12188 [11:13:50<14:08:23, 7.30s/it] {'loss': 0.3188, 'grad_norm': 0.6256370385863597, 'learning_rate': 6.394981049137603e-06, 'epoch': 0.43} + 43%|████▎ | 5214/12188 [11:13:50<14:08:23, 7.30s/it] 43%|████▎ | 5215/12188 [11:13:58<14:06:33, 7.28s/it] {'loss': 0.3347, 'grad_norm': 0.7294911274223399, 'learning_rate': 6.393705053809221e-06, 'epoch': 0.43} + 43%|████▎ | 5215/12188 [11:13:58<14:06:33, 7.28s/it] 43%|████▎ | 5216/12188 [11:14:05<13:52:07, 7.16s/it] {'loss': 0.3857, 'grad_norm': 0.6852816425336488, 'learning_rate': 6.392428960059628e-06, 'epoch': 0.43} + 43%|████▎ | 5216/12188 [11:14:05<13:52:07, 7.16s/it] 43%|████▎ | 5217/12188 [11:14:12<14:12:55, 7.34s/it] {'loss': 0.3518, 'grad_norm': 0.6149539419000086, 'learning_rate': 6.391152767978939e-06, 'epoch': 0.43} + 43%|████▎ | 5217/12188 [11:14:12<14:12:55, 7.34s/it] 43%|████▎ | 5218/12188 [11:14:20<14:19:07, 7.40s/it] {'loss': 0.3342, 'grad_norm': 0.6854941505993525, 'learning_rate': 6.389876477657275e-06, 'epoch': 0.43} + 43%|████▎ | 5218/12188 [11:14:20<14:19:07, 7.40s/it] 43%|████▎ | 5219/12188 [11:14:27<13:59:17, 7.23s/it] {'loss': 0.3526, 'grad_norm': 0.625936470953801, 'learning_rate': 6.388600089184769e-06, 'epoch': 0.43} + 43%|████▎ | 5219/12188 [11:14:27<13:59:17, 7.23s/it] 43%|████▎ | 5220/12188 [11:14:34<13:47:24, 7.12s/it] {'loss': 0.3551, 'grad_norm': 0.6232577211406911, 'learning_rate': 6.387323602651554e-06, 'epoch': 0.43} + 43%|████▎ | 5220/12188 [11:14:34<13:47:24, 7.12s/it] 43%|████▎ | 5221/12188 [11:14:40<13:33:37, 7.01s/it] {'loss': 0.3686, 'grad_norm': 0.6811953951797977, 'learning_rate': 6.386047018147776e-06, 'epoch': 0.43} + 43%|████▎ | 5221/12188 [11:14:40<13:33:37, 7.01s/it] 43%|████▎ | 5222/12188 [11:14:47<13:26:54, 6.95s/it] {'loss': 0.3562, 'grad_norm': 0.6883477694345871, 'learning_rate': 6.384770335763584e-06, 'epoch': 0.43} + 43%|████▎ | 5222/12188 [11:14:47<13:26:54, 6.95s/it] 43%|████▎ | 5223/12188 [11:14:55<13:49:29, 7.15s/it] {'loss': 0.3524, 'grad_norm': 0.6093799108535922, 'learning_rate': 6.383493555589136e-06, 'epoch': 0.43} + 43%|████▎ | 5223/12188 [11:14:55<13:49:29, 7.15s/it] 43%|████▎ | 5224/12188 [11:15:02<14:08:23, 7.31s/it] {'loss': 0.3644, 'grad_norm': 0.613873978620886, 'learning_rate': 6.382216677714597e-06, 'epoch': 0.43} + 43%|████▎ | 5224/12188 [11:15:02<14:08:23, 7.31s/it] 43%|████▎ | 5225/12188 [11:15:09<13:52:22, 7.17s/it] {'loss': 0.3734, 'grad_norm': 0.6743667402912125, 'learning_rate': 6.380939702230136e-06, 'epoch': 0.43} + 43%|████▎ | 5225/12188 [11:15:09<13:52:22, 7.17s/it] 43%|████▎ | 5226/12188 [11:15:16<13:26:50, 6.95s/it] {'loss': 0.3528, 'grad_norm': 0.6420071399206235, 'learning_rate': 6.37966262922593e-06, 'epoch': 0.43} + 43%|████▎ | 5226/12188 [11:15:16<13:26:50, 6.95s/it] 43%|████▎ | 5227/12188 [11:15:23<13:26:07, 6.95s/it] {'loss': 0.36, 'grad_norm': 0.6370514626860275, 'learning_rate': 6.3783854587921666e-06, 'epoch': 0.43} + 43%|████▎ | 5227/12188 [11:15:23<13:26:07, 6.95s/it] 43%|████▎ | 5228/12188 [11:15:30<13:40:11, 7.07s/it] {'loss': 0.3167, 'grad_norm': 0.6053231780139077, 'learning_rate': 6.377108191019038e-06, 'epoch': 0.43} + 43%|████▎ | 5228/12188 [11:15:30<13:40:11, 7.07s/it] 43%|████▎ | 5229/12188 [11:15:37<13:36:38, 7.04s/it] {'loss': 0.3151, 'grad_norm': 0.6346567151370308, 'learning_rate': 6.3758308259967404e-06, 'epoch': 0.43} + 43%|████▎ | 5229/12188 [11:15:37<13:36:38, 7.04s/it] 43%|████▎ | 5230/12188 [11:15:44<13:28:37, 6.97s/it] {'loss': 0.3243, 'grad_norm': 0.6578174319008205, 'learning_rate': 6.374553363815481e-06, 'epoch': 0.43} + 43%|████▎ | 5230/12188 [11:15:44<13:28:37, 6.97s/it] 43%|████▎ | 5231/12188 [11:15:51<13:26:27, 6.96s/it] {'loss': 0.3484, 'grad_norm': 0.6905181950605965, 'learning_rate': 6.373275804565471e-06, 'epoch': 0.43} + 43%|████▎ | 5231/12188 [11:15:51<13:26:27, 6.96s/it] 43%|████▎ | 5232/12188 [11:15:58<13:24:33, 6.94s/it] {'loss': 0.3443, 'grad_norm': 0.6332042465236195, 'learning_rate': 6.371998148336931e-06, 'epoch': 0.43} + 43%|████▎ | 5232/12188 [11:15:58<13:24:33, 6.94s/it] 43%|████▎ | 5233/12188 [11:16:06<14:00:11, 7.25s/it] {'loss': 0.3554, 'grad_norm': 0.6408808795961548, 'learning_rate': 6.370720395220085e-06, 'epoch': 0.43} + 43%|████▎ | 5233/12188 [11:16:06<14:00:11, 7.25s/it] 43%|████▎ | 5234/12188 [11:16:12<13:42:32, 7.10s/it] {'loss': 0.3516, 'grad_norm': 0.6695121960595787, 'learning_rate': 6.3694425453051665e-06, 'epoch': 0.43} + 43%|████▎ | 5234/12188 [11:16:12<13:42:32, 7.10s/it] 43%|████▎ | 5235/12188 [11:16:20<13:52:26, 7.18s/it] {'loss': 0.3729, 'grad_norm': 0.6653009485268677, 'learning_rate': 6.3681645986824155e-06, 'epoch': 0.43} + 43%|████▎ | 5235/12188 [11:16:20<13:52:26, 7.18s/it] 43%|████▎ | 5236/12188 [11:16:30<15:48:23, 8.19s/it] {'loss': 0.3292, 'grad_norm': 0.6750830139740812, 'learning_rate': 6.36688655544208e-06, 'epoch': 0.43} + 43%|████▎ | 5236/12188 [11:16:30<15:48:23, 8.19s/it] 43%|████▎ | 5237/12188 [11:16:38<15:17:24, 7.92s/it] {'loss': 0.3401, 'grad_norm': 0.6450712443154315, 'learning_rate': 6.365608415674412e-06, 'epoch': 0.43} + 43%|████▎ | 5237/12188 [11:16:38<15:17:24, 7.92s/it] 43%|████▎ | 5238/12188 [11:16:44<14:39:00, 7.59s/it] {'loss': 0.3192, 'grad_norm': 0.655978074450412, 'learning_rate': 6.364330179469671e-06, 'epoch': 0.43} + 43%|████▎ | 5238/12188 [11:16:44<14:39:00, 7.59s/it] 43%|████▎ | 5239/12188 [11:16:51<14:20:59, 7.43s/it] {'loss': 0.3406, 'grad_norm': 0.6342942639745143, 'learning_rate': 6.3630518469181246e-06, 'epoch': 0.43} + 43%|████▎ | 5239/12188 [11:16:51<14:20:59, 7.43s/it] 43%|████▎ | 5240/12188 [11:16:58<13:59:02, 7.25s/it] {'loss': 0.3603, 'grad_norm': 0.6067398283571768, 'learning_rate': 6.361773418110046e-06, 'epoch': 0.43} + 43%|████▎ | 5240/12188 [11:16:58<13:59:02, 7.25s/it] 43%|████▎ | 5241/12188 [11:17:06<14:00:55, 7.26s/it] {'loss': 0.3078, 'grad_norm': 0.6253228209171883, 'learning_rate': 6.360494893135718e-06, 'epoch': 0.43} + 43%|████▎ | 5241/12188 [11:17:06<14:00:55, 7.26s/it] 43%|████▎ | 5242/12188 [11:17:13<13:57:51, 7.24s/it] {'loss': 0.3243, 'grad_norm': 0.6563721232056683, 'learning_rate': 6.359216272085426e-06, 'epoch': 0.43} + 43%|████▎ | 5242/12188 [11:17:13<13:57:51, 7.24s/it] 43%|████▎ | 5243/12188 [11:17:21<14:23:51, 7.46s/it] {'loss': 0.3427, 'grad_norm': 0.6335055832961313, 'learning_rate': 6.357937555049465e-06, 'epoch': 0.43} + 43%|████▎ | 5243/12188 [11:17:21<14:23:51, 7.46s/it] 43%|████▎ | 5244/12188 [11:17:30<15:15:34, 7.91s/it] {'loss': 0.3433, 'grad_norm': 0.6821082798169849, 'learning_rate': 6.356658742118135e-06, 'epoch': 0.43} + 43%|████▎ | 5244/12188 [11:17:30<15:15:34, 7.91s/it] 43%|████▎ | 5245/12188 [11:17:37<14:56:52, 7.75s/it] {'loss': 0.3238, 'grad_norm': 0.6256579622817607, 'learning_rate': 6.355379833381744e-06, 'epoch': 0.43} + 43%|████▎ | 5245/12188 [11:17:37<14:56:52, 7.75s/it] 43%|████▎ | 5246/12188 [11:17:45<14:58:24, 7.77s/it] {'loss': 0.2994, 'grad_norm': 0.6024407717261073, 'learning_rate': 6.354100828930607e-06, 'epoch': 0.43} + 43%|████▎ | 5246/12188 [11:17:45<14:58:24, 7.77s/it] 43%|████▎ | 5247/12188 [11:17:52<14:35:59, 7.57s/it] {'loss': 0.3601, 'grad_norm': 0.6819714793577115, 'learning_rate': 6.3528217288550455e-06, 'epoch': 0.43} + 43%|████▎ | 5247/12188 [11:17:52<14:35:59, 7.57s/it] 43%|████▎ | 5248/12188 [11:17:59<14:31:00, 7.53s/it] {'loss': 0.3581, 'grad_norm': 0.5916509097785213, 'learning_rate': 6.351542533245385e-06, 'epoch': 0.43} + 43%|████▎ | 5248/12188 [11:17:59<14:31:00, 7.53s/it] 43%|████▎ | 5249/12188 [11:18:06<13:52:04, 7.19s/it] {'loss': 0.2981, 'grad_norm': 0.6486083863506376, 'learning_rate': 6.350263242191963e-06, 'epoch': 0.43} + 43%|████▎ | 5249/12188 [11:18:06<13:52:04, 7.19s/it] 43%|████▎ | 5250/12188 [11:18:13<13:45:09, 7.14s/it] {'loss': 0.3507, 'grad_norm': 0.6399318135844196, 'learning_rate': 6.348983855785122e-06, 'epoch': 0.43} + 43%|████▎ | 5250/12188 [11:18:13<13:45:09, 7.14s/it] 43%|████▎ | 5251/12188 [11:18:19<13:26:23, 6.97s/it] {'loss': 0.2958, 'grad_norm': 0.6341026956448524, 'learning_rate': 6.347704374115208e-06, 'epoch': 0.43} + 43%|████▎ | 5251/12188 [11:18:19<13:26:23, 6.97s/it] 43%|████▎ | 5252/12188 [11:18:27<13:49:15, 7.17s/it] {'loss': 0.3689, 'grad_norm': 0.6167459001987715, 'learning_rate': 6.346424797272576e-06, 'epoch': 0.43} + 43%|████▎ | 5252/12188 [11:18:27<13:49:15, 7.17s/it] 43%|████▎ | 5253/12188 [11:18:35<14:27:39, 7.51s/it] {'loss': 0.3271, 'grad_norm': 0.5881058918400779, 'learning_rate': 6.345145125347587e-06, 'epoch': 0.43} + 43%|████▎ | 5253/12188 [11:18:35<14:27:39, 7.51s/it] 43%|████▎ | 5254/12188 [11:18:44<15:02:04, 7.81s/it] {'loss': 0.3167, 'grad_norm': 0.635261452801603, 'learning_rate': 6.343865358430611e-06, 'epoch': 0.43} + 43%|████▎ | 5254/12188 [11:18:44<15:02:04, 7.81s/it] 43%|████▎ | 5255/12188 [11:18:51<14:39:38, 7.61s/it] {'loss': 0.2825, 'grad_norm': 0.6750927587839273, 'learning_rate': 6.342585496612021e-06, 'epoch': 0.43} + 43%|████▎ | 5255/12188 [11:18:51<14:39:38, 7.61s/it] 43%|████▎ | 5256/12188 [11:18:58<14:12:21, 7.38s/it] {'loss': 0.362, 'grad_norm': 0.6997605604316285, 'learning_rate': 6.341305539982203e-06, 'epoch': 0.43} + 43%|████▎ | 5256/12188 [11:18:58<14:12:21, 7.38s/it] 43%|████▎ | 5257/12188 [11:19:05<14:06:33, 7.33s/it] {'loss': 0.2927, 'grad_norm': 0.5874013812022385, 'learning_rate': 6.340025488631541e-06, 'epoch': 0.43} + 43%|████▎ | 5257/12188 [11:19:05<14:06:33, 7.33s/it] 43%|████▎ | 5258/12188 [11:19:12<13:47:56, 7.17s/it] {'loss': 0.3588, 'grad_norm': 0.6542020108547706, 'learning_rate': 6.338745342650433e-06, 'epoch': 0.43} + 43%|████▎ | 5258/12188 [11:19:12<13:47:56, 7.17s/it] 43%|████▎ | 5259/12188 [11:19:19<13:41:15, 7.11s/it] {'loss': 0.322, 'grad_norm': 0.6666251142393067, 'learning_rate': 6.337465102129279e-06, 'epoch': 0.43} + 43%|████▎ | 5259/12188 [11:19:19<13:41:15, 7.11s/it] 43%|████▎ | 5260/12188 [11:19:26<13:32:05, 7.03s/it] {'loss': 0.3807, 'grad_norm': 0.732614064284012, 'learning_rate': 6.336184767158488e-06, 'epoch': 0.43} + 43%|████▎ | 5260/12188 [11:19:26<13:32:05, 7.03s/it] 43%|████▎ | 5261/12188 [11:19:33<13:41:12, 7.11s/it] {'loss': 0.3334, 'grad_norm': 0.6070683078518448, 'learning_rate': 6.334904337828477e-06, 'epoch': 0.43} + 43%|████▎ | 5261/12188 [11:19:33<13:41:12, 7.11s/it] 43%|████▎ | 5262/12188 [11:19:43<15:14:57, 7.93s/it] {'loss': 0.3647, 'grad_norm': 0.6579420488954592, 'learning_rate': 6.3336238142296645e-06, 'epoch': 0.43} + 43%|████▎ | 5262/12188 [11:19:43<15:14:57, 7.93s/it] 43%|████▎ | 5263/12188 [11:19:50<14:48:36, 7.70s/it] {'loss': 0.3433, 'grad_norm': 0.6768781855207855, 'learning_rate': 6.3323431964524815e-06, 'epoch': 0.43} + 43%|████▎ | 5263/12188 [11:19:50<14:48:36, 7.70s/it] 43%|████▎ | 5264/12188 [11:19:57<14:25:22, 7.50s/it] {'loss': 0.3189, 'grad_norm': 0.6514030112478398, 'learning_rate': 6.331062484587362e-06, 'epoch': 0.43} + 43%|████▎ | 5264/12188 [11:19:57<14:25:22, 7.50s/it] 43%|████▎ | 5265/12188 [11:20:04<14:06:04, 7.33s/it] {'loss': 0.3528, 'grad_norm': 0.6781344724585752, 'learning_rate': 6.329781678724752e-06, 'epoch': 0.43} + 43%|████▎ | 5265/12188 [11:20:04<14:06:04, 7.33s/it] 43%|████▎ | 5266/12188 [11:20:11<14:13:59, 7.40s/it] {'loss': 0.3363, 'grad_norm': 0.6584637535953564, 'learning_rate': 6.328500778955091e-06, 'epoch': 0.43} + 43%|████▎ | 5266/12188 [11:20:11<14:13:59, 7.40s/it] 43%|████▎ | 5267/12188 [11:20:19<14:16:18, 7.42s/it] {'loss': 0.3715, 'grad_norm': 0.7171365596956452, 'learning_rate': 6.3272197853688425e-06, 'epoch': 0.43} + 43%|████▎ | 5267/12188 [11:20:19<14:16:18, 7.42s/it] 43%|████▎ | 5268/12188 [11:20:27<14:31:16, 7.55s/it] {'loss': 0.3302, 'grad_norm': 0.6402552462497081, 'learning_rate': 6.325938698056464e-06, 'epoch': 0.43} + 43%|████▎ | 5268/12188 [11:20:27<14:31:16, 7.55s/it] 43%|████▎ | 5269/12188 [11:20:35<14:50:55, 7.73s/it] {'loss': 0.3178, 'grad_norm': 0.616058905299238, 'learning_rate': 6.324657517108426e-06, 'epoch': 0.43} + 43%|████▎ | 5269/12188 [11:20:35<14:50:55, 7.73s/it] 43%|████▎ | 5270/12188 [11:20:42<14:34:01, 7.58s/it] {'loss': 0.2976, 'grad_norm': 0.574571618644265, 'learning_rate': 6.323376242615201e-06, 'epoch': 0.43} + 43%|████▎ | 5270/12188 [11:20:42<14:34:01, 7.58s/it] 43%|████▎ | 5271/12188 [11:20:50<14:38:08, 7.62s/it] {'loss': 0.302, 'grad_norm': 0.6546633250482502, 'learning_rate': 6.322094874667273e-06, 'epoch': 0.43} + 43%|████▎ | 5271/12188 [11:20:50<14:38:08, 7.62s/it] 43%|████▎ | 5272/12188 [11:20:58<14:56:13, 7.78s/it] {'loss': 0.3333, 'grad_norm': 0.6229092716736692, 'learning_rate': 6.320813413355129e-06, 'epoch': 0.43} + 43%|████▎ | 5272/12188 [11:20:58<14:56:13, 7.78s/it] 43%|████▎ | 5273/12188 [11:21:05<14:29:39, 7.55s/it] {'loss': 0.368, 'grad_norm': 0.6908233474323751, 'learning_rate': 6.319531858769263e-06, 'epoch': 0.43} + 43%|████▎ | 5273/12188 [11:21:05<14:29:39, 7.55s/it] 43%|████▎ | 5274/12188 [11:21:12<14:14:16, 7.41s/it] {'loss': 0.3384, 'grad_norm': 0.6268276713180859, 'learning_rate': 6.318250211000179e-06, 'epoch': 0.43} + 43%|████▎ | 5274/12188 [11:21:12<14:14:16, 7.41s/it] 43%|████▎ | 5275/12188 [11:21:22<15:45:01, 8.20s/it] {'loss': 0.3693, 'grad_norm': 0.7456314594038563, 'learning_rate': 6.316968470138379e-06, 'epoch': 0.43} + 43%|████▎ | 5275/12188 [11:21:22<15:45:01, 8.20s/it] 43%|████▎ | 5276/12188 [11:21:30<15:35:56, 8.12s/it] {'loss': 0.3489, 'grad_norm': 0.6370785744866168, 'learning_rate': 6.315686636274385e-06, 'epoch': 0.43} + 43%|████▎ | 5276/12188 [11:21:30<15:35:56, 8.12s/it] 43%|████▎ | 5277/12188 [11:21:37<14:54:51, 7.77s/it] {'loss': 0.319, 'grad_norm': 0.6631063861949101, 'learning_rate': 6.314404709498714e-06, 'epoch': 0.43} + 43%|████▎ | 5277/12188 [11:21:37<14:54:51, 7.77s/it] 43%|████▎ | 5278/12188 [11:21:46<15:36:11, 8.13s/it] {'loss': 0.3217, 'grad_norm': 0.6447495565504858, 'learning_rate': 6.313122689901894e-06, 'epoch': 0.43} + 43%|████▎ | 5278/12188 [11:21:46<15:36:11, 8.13s/it] 43%|████▎ | 5279/12188 [11:21:54<15:18:44, 7.98s/it] {'loss': 0.3062, 'grad_norm': 0.6555301576265714, 'learning_rate': 6.31184057757446e-06, 'epoch': 0.43} + 43%|████▎ | 5279/12188 [11:21:54<15:18:44, 7.98s/it] 43%|████▎ | 5280/12188 [11:22:01<14:55:47, 7.78s/it] {'loss': 0.3201, 'grad_norm': 0.6250268482552348, 'learning_rate': 6.3105583726069495e-06, 'epoch': 0.43} + 43%|████▎ | 5280/12188 [11:22:01<14:55:47, 7.78s/it] 43%|████▎ | 5281/12188 [11:22:08<14:26:16, 7.53s/it] {'loss': 0.3413, 'grad_norm': 0.6099854655690351, 'learning_rate': 6.309276075089914e-06, 'epoch': 0.43} + 43%|████▎ | 5281/12188 [11:22:08<14:26:16, 7.53s/it] 43%|████▎ | 5282/12188 [11:22:15<14:23:47, 7.50s/it] {'loss': 0.3151, 'grad_norm': 0.7599188757773451, 'learning_rate': 6.307993685113907e-06, 'epoch': 0.43} + 43%|████▎ | 5282/12188 [11:22:15<14:23:47, 7.50s/it] 43%|████▎ | 5283/12188 [11:22:22<14:05:20, 7.35s/it] {'loss': 0.3119, 'grad_norm': 0.6068302141474277, 'learning_rate': 6.306711202769485e-06, 'epoch': 0.43} + 43%|████▎ | 5283/12188 [11:22:22<14:05:20, 7.35s/it] 43%|████▎ | 5284/12188 [11:22:31<14:58:57, 7.81s/it] {'loss': 0.3686, 'grad_norm': 0.6377773356715855, 'learning_rate': 6.305428628147218e-06, 'epoch': 0.43} + 43%|████▎ | 5284/12188 [11:22:31<14:58:57, 7.81s/it] 43%|████▎ | 5285/12188 [11:22:38<14:35:26, 7.61s/it] {'loss': 0.3139, 'grad_norm': 0.655997009829089, 'learning_rate': 6.3041459613376795e-06, 'epoch': 0.43} + 43%|████▎ | 5285/12188 [11:22:38<14:35:26, 7.61s/it] 43%|████▎ | 5286/12188 [11:22:47<15:03:54, 7.86s/it] {'loss': 0.329, 'grad_norm': 0.7499527684634661, 'learning_rate': 6.3028632024314485e-06, 'epoch': 0.43} + 43%|████▎ | 5286/12188 [11:22:47<15:03:54, 7.86s/it] 43%|████▎ | 5287/12188 [11:22:54<14:35:33, 7.61s/it] {'loss': 0.34, 'grad_norm': 0.621286575168743, 'learning_rate': 6.3015803515191096e-06, 'epoch': 0.43} + 43%|████▎ | 5287/12188 [11:22:54<14:35:33, 7.61s/it] 43%|████▎ | 5288/12188 [11:23:01<14:23:15, 7.51s/it] {'loss': 0.3451, 'grad_norm': 0.7517773430299831, 'learning_rate': 6.30029740869126e-06, 'epoch': 0.43} + 43%|████▎ | 5288/12188 [11:23:01<14:23:15, 7.51s/it] 43%|████▎ | 5289/12188 [11:23:09<14:21:10, 7.49s/it] {'loss': 0.3163, 'grad_norm': 0.6129273578187738, 'learning_rate': 6.299014374038493e-06, 'epoch': 0.43} + 43%|████▎ | 5289/12188 [11:23:09<14:21:10, 7.49s/it] 43%|████▎ | 5290/12188 [11:23:16<14:27:21, 7.54s/it] {'loss': 0.3384, 'grad_norm': 0.6470323353587519, 'learning_rate': 6.297731247651419e-06, 'epoch': 0.43} + 43%|████▎ | 5290/12188 [11:23:16<14:27:21, 7.54s/it] 43%|████▎ | 5291/12188 [11:23:23<13:54:27, 7.26s/it] {'loss': 0.3352, 'grad_norm': 0.6573035819230301, 'learning_rate': 6.296448029620652e-06, 'epoch': 0.43} + 43%|████▎ | 5291/12188 [11:23:23<13:54:27, 7.26s/it] 43%|████▎ | 5292/12188 [11:23:31<14:20:10, 7.48s/it] {'loss': 0.3438, 'grad_norm': 0.6727398589516524, 'learning_rate': 6.295164720036806e-06, 'epoch': 0.43} + 43%|████▎ | 5292/12188 [11:23:31<14:20:10, 7.48s/it] 43%|████▎ | 5293/12188 [11:23:39<14:57:33, 7.81s/it] {'loss': 0.358, 'grad_norm': 0.655232979788338, 'learning_rate': 6.293881318990509e-06, 'epoch': 0.43} + 43%|████▎ | 5293/12188 [11:23:39<14:57:33, 7.81s/it] 43%|████▎ | 5294/12188 [11:23:46<14:28:22, 7.56s/it] {'loss': 0.3627, 'grad_norm': 0.6838524687406651, 'learning_rate': 6.292597826572391e-06, 'epoch': 0.43} + 43%|████▎ | 5294/12188 [11:23:46<14:28:22, 7.56s/it] 43%|████▎ | 5295/12188 [11:23:54<14:29:58, 7.57s/it] {'loss': 0.36, 'grad_norm': 0.6519562107258645, 'learning_rate': 6.291314242873092e-06, 'epoch': 0.43} + 43%|████▎ | 5295/12188 [11:23:54<14:29:58, 7.57s/it] 43%|████▎ | 5296/12188 [11:24:03<15:03:48, 7.87s/it] {'loss': 0.3533, 'grad_norm': 0.6901950835938988, 'learning_rate': 6.2900305679832565e-06, 'epoch': 0.43} + 43%|████▎ | 5296/12188 [11:24:03<15:03:48, 7.87s/it] 43%|████▎ | 5297/12188 [11:24:09<14:15:17, 7.45s/it] {'loss': 0.3305, 'grad_norm': 0.6102133299352966, 'learning_rate': 6.288746801993533e-06, 'epoch': 0.43} + 43%|████▎ | 5297/12188 [11:24:09<14:15:17, 7.45s/it] 43%|████▎ | 5298/12188 [11:24:16<13:58:56, 7.31s/it] {'loss': 0.3245, 'grad_norm': 0.6412059354167786, 'learning_rate': 6.287462944994582e-06, 'epoch': 0.43} + 43%|████▎ | 5298/12188 [11:24:16<13:58:56, 7.31s/it] 43%|████▎ | 5299/12188 [11:24:23<13:46:05, 7.19s/it] {'loss': 0.342, 'grad_norm': 0.6150182740332532, 'learning_rate': 6.286178997077066e-06, 'epoch': 0.43} + 43%|████▎ | 5299/12188 [11:24:23<13:46:05, 7.19s/it] 43%|████▎ | 5300/12188 [11:24:31<14:06:17, 7.37s/it] {'loss': 0.3341, 'grad_norm': 0.6251794313137558, 'learning_rate': 6.284894958331657e-06, 'epoch': 0.43} + 43%|████▎ | 5300/12188 [11:24:31<14:06:17, 7.37s/it] 43%|████▎ | 5301/12188 [11:24:38<14:03:00, 7.34s/it] {'loss': 0.3193, 'grad_norm': 0.5997117985570801, 'learning_rate': 6.283610828849028e-06, 'epoch': 0.43} + 43%|████▎ | 5301/12188 [11:24:38<14:03:00, 7.34s/it] 44%|████▎ | 5302/12188 [11:24:45<13:40:56, 7.15s/it] {'loss': 0.3607, 'grad_norm': 0.6738481040632917, 'learning_rate': 6.282326608719865e-06, 'epoch': 0.44} + 44%|████▎ | 5302/12188 [11:24:45<13:40:56, 7.15s/it] 44%|████▎ | 5303/12188 [11:24:55<15:34:06, 8.14s/it] {'loss': 0.3524, 'grad_norm': 0.714878430444649, 'learning_rate': 6.281042298034858e-06, 'epoch': 0.44} + 44%|████▎ | 5303/12188 [11:24:55<15:34:06, 8.14s/it] 44%|████▎ | 5304/12188 [11:25:03<15:31:28, 8.12s/it] {'loss': 0.3304, 'grad_norm': 0.6673566456773349, 'learning_rate': 6.2797578968847015e-06, 'epoch': 0.44} + 44%|████▎ | 5304/12188 [11:25:03<15:31:28, 8.12s/it] 44%|████▎ | 5305/12188 [11:25:10<14:45:30, 7.72s/it] {'loss': 0.3837, 'grad_norm': 0.6948467177238463, 'learning_rate': 6.278473405360099e-06, 'epoch': 0.44} + 44%|████▎ | 5305/12188 [11:25:10<14:45:30, 7.72s/it] 44%|████▎ | 5306/12188 [11:25:17<14:16:56, 7.47s/it] {'loss': 0.3641, 'grad_norm': 0.6676519832474107, 'learning_rate': 6.2771888235517586e-06, 'epoch': 0.44} + 44%|████▎ | 5306/12188 [11:25:17<14:16:56, 7.47s/it] 44%|████▎ | 5307/12188 [11:25:24<14:19:25, 7.49s/it] {'loss': 0.3239, 'grad_norm': 0.5897993247575718, 'learning_rate': 6.275904151550396e-06, 'epoch': 0.44} + 44%|████▎ | 5307/12188 [11:25:24<14:19:25, 7.49s/it] 44%|████▎ | 5308/12188 [11:25:31<13:55:27, 7.29s/it] {'loss': 0.3515, 'grad_norm': 0.6379250598981987, 'learning_rate': 6.274619389446731e-06, 'epoch': 0.44} + 44%|████▎ | 5308/12188 [11:25:31<13:55:27, 7.29s/it] 44%|████▎ | 5309/12188 [11:25:38<13:41:08, 7.16s/it] {'loss': 0.3436, 'grad_norm': 0.6398214982428917, 'learning_rate': 6.273334537331494e-06, 'epoch': 0.44} + 44%|████▎ | 5309/12188 [11:25:38<13:41:08, 7.16s/it] 44%|████▎ | 5310/12188 [11:25:45<13:36:58, 7.13s/it] {'loss': 0.3174, 'grad_norm': 0.7026309962756463, 'learning_rate': 6.272049595295416e-06, 'epoch': 0.44} + 44%|████▎ | 5310/12188 [11:25:45<13:36:58, 7.13s/it] 44%|████▎ | 5311/12188 [11:25:53<13:48:25, 7.23s/it] {'loss': 0.3017, 'grad_norm': 0.6775524466114633, 'learning_rate': 6.2707645634292395e-06, 'epoch': 0.44} + 44%|████▎ | 5311/12188 [11:25:53<13:48:25, 7.23s/it] 44%|████▎ | 5312/12188 [11:26:00<13:48:13, 7.23s/it] {'loss': 0.3588, 'grad_norm': 0.6719736914165287, 'learning_rate': 6.269479441823712e-06, 'epoch': 0.44} + 44%|████▎ | 5312/12188 [11:26:00<13:48:13, 7.23s/it] 44%|████▎ | 5313/12188 [11:26:07<13:31:38, 7.08s/it] {'loss': 0.313, 'grad_norm': 0.6371205173884819, 'learning_rate': 6.2681942305695865e-06, 'epoch': 0.44} + 44%|████▎ | 5313/12188 [11:26:07<13:31:38, 7.08s/it] 44%|████▎ | 5314/12188 [11:26:13<13:17:51, 6.96s/it] {'loss': 0.3552, 'grad_norm': 0.6553271533414169, 'learning_rate': 6.26690892975762e-06, 'epoch': 0.44} + 44%|████▎ | 5314/12188 [11:26:13<13:17:51, 6.96s/it] 44%|████▎ | 5315/12188 [11:26:21<13:32:35, 7.09s/it] {'loss': 0.3326, 'grad_norm': 0.6545600423530018, 'learning_rate': 6.265623539478583e-06, 'epoch': 0.44} + 44%|████▎ | 5315/12188 [11:26:21<13:32:35, 7.09s/it] 44%|████▎ | 5316/12188 [11:26:28<13:27:23, 7.05s/it] {'loss': 0.3481, 'grad_norm': 0.6598676411412718, 'learning_rate': 6.264338059823242e-06, 'epoch': 0.44} + 44%|████▎ | 5316/12188 [11:26:28<13:27:23, 7.05s/it] 44%|████▎ | 5317/12188 [11:26:35<13:47:49, 7.23s/it] {'loss': 0.3289, 'grad_norm': 0.5962628065136057, 'learning_rate': 6.26305249088238e-06, 'epoch': 0.44} + 44%|████▎ | 5317/12188 [11:26:35<13:47:49, 7.23s/it] 44%|████▎ | 5318/12188 [11:26:42<13:34:03, 7.11s/it] {'loss': 0.3662, 'grad_norm': 0.6710013027884152, 'learning_rate': 6.26176683274678e-06, 'epoch': 0.44} + 44%|████▎ | 5318/12188 [11:26:42<13:34:03, 7.11s/it] 44%|████▎ | 5319/12188 [11:26:50<14:10:22, 7.43s/it] {'loss': 0.314, 'grad_norm': 0.6034649334824211, 'learning_rate': 6.2604810855072325e-06, 'epoch': 0.44} + 44%|████▎ | 5319/12188 [11:26:50<14:10:22, 7.43s/it] 44%|████▎ | 5320/12188 [11:26:57<13:45:32, 7.21s/it] {'loss': 0.3052, 'grad_norm': 0.6717090793088603, 'learning_rate': 6.259195249254539e-06, 'epoch': 0.44} + 44%|████▎ | 5320/12188 [11:26:57<13:45:32, 7.21s/it] 44%|████▎ | 5321/12188 [11:27:04<13:45:08, 7.21s/it] {'loss': 0.3569, 'grad_norm': 0.6637976030011434, 'learning_rate': 6.257909324079498e-06, 'epoch': 0.44} + 44%|████▎ | 5321/12188 [11:27:04<13:45:08, 7.21s/it] 44%|████▎ | 5322/12188 [11:27:12<14:01:44, 7.36s/it] {'loss': 0.3665, 'grad_norm': 0.6703190297065894, 'learning_rate': 6.25662331007292e-06, 'epoch': 0.44} + 44%|████▎ | 5322/12188 [11:27:12<14:01:44, 7.36s/it] 44%|████▎ | 5323/12188 [11:27:19<13:48:17, 7.24s/it] {'loss': 0.3779, 'grad_norm': 0.6521997040013581, 'learning_rate': 6.255337207325625e-06, 'epoch': 0.44} + 44%|████▎ | 5323/12188 [11:27:19<13:48:17, 7.24s/it] 44%|████▎ | 5324/12188 [11:27:26<13:36:47, 7.14s/it] {'loss': 0.3324, 'grad_norm': 0.6736351670757265, 'learning_rate': 6.254051015928433e-06, 'epoch': 0.44} + 44%|████▎ | 5324/12188 [11:27:26<13:36:47, 7.14s/it] 44%|████▎ | 5325/12188 [11:27:33<13:41:24, 7.18s/it] {'loss': 0.3336, 'grad_norm': 0.6614256394923991, 'learning_rate': 6.252764735972172e-06, 'epoch': 0.44} + 44%|████▎ | 5325/12188 [11:27:33<13:41:24, 7.18s/it] 44%|████▎ | 5326/12188 [11:27:40<13:38:33, 7.16s/it] {'loss': 0.3253, 'grad_norm': 0.6690544532802983, 'learning_rate': 6.251478367547679e-06, 'epoch': 0.44} + 44%|████▎ | 5326/12188 [11:27:40<13:38:33, 7.16s/it] 44%|████▎ | 5327/12188 [11:27:47<13:32:26, 7.10s/it] {'loss': 0.2928, 'grad_norm': 0.6272554073108739, 'learning_rate': 6.250191910745794e-06, 'epoch': 0.44} + 44%|████▎ | 5327/12188 [11:27:47<13:32:26, 7.10s/it] 44%|████▎ | 5328/12188 [11:27:55<13:57:05, 7.32s/it] {'loss': 0.3073, 'grad_norm': 0.7031796625952058, 'learning_rate': 6.248905365657365e-06, 'epoch': 0.44} + 44%|████▎ | 5328/12188 [11:27:55<13:57:05, 7.32s/it] 44%|████▎ | 5329/12188 [11:28:04<14:42:51, 7.72s/it] {'loss': 0.3561, 'grad_norm': 0.6672313095533291, 'learning_rate': 6.247618732373245e-06, 'epoch': 0.44} + 44%|████▎ | 5329/12188 [11:28:04<14:42:51, 7.72s/it] 44%|████▎ | 5330/12188 [11:28:11<14:20:58, 7.53s/it] {'loss': 0.3488, 'grad_norm': 0.6558791794554417, 'learning_rate': 6.246332010984295e-06, 'epoch': 0.44} + 44%|████▎ | 5330/12188 [11:28:11<14:20:58, 7.53s/it] 44%|████▎ | 5331/12188 [11:28:17<13:48:40, 7.25s/it] {'loss': 0.3661, 'grad_norm': 0.6361873991588707, 'learning_rate': 6.245045201581381e-06, 'epoch': 0.44} + 44%|████▎ | 5331/12188 [11:28:17<13:48:40, 7.25s/it] 44%|████▎ | 5332/12188 [11:28:25<14:19:59, 7.53s/it] {'loss': 0.3361, 'grad_norm': 0.7005031721817526, 'learning_rate': 6.243758304255375e-06, 'epoch': 0.44} + 44%|████▎ | 5332/12188 [11:28:25<14:19:59, 7.53s/it] 44%|████▍ | 5333/12188 [11:28:32<14:03:09, 7.38s/it] {'loss': 0.3223, 'grad_norm': 0.6000547211030636, 'learning_rate': 6.242471319097156e-06, 'epoch': 0.44} + 44%|████▍ | 5333/12188 [11:28:33<14:03:09, 7.38s/it] 44%|████▍ | 5334/12188 [11:28:40<14:00:08, 7.35s/it] {'loss': 0.327, 'grad_norm': 0.6240332426985268, 'learning_rate': 6.241184246197609e-06, 'epoch': 0.44} + 44%|████▍ | 5334/12188 [11:28:40<14:00:08, 7.35s/it] 44%|████▍ | 5335/12188 [11:28:47<14:01:34, 7.37s/it] {'loss': 0.3261, 'grad_norm': 0.6993525984398733, 'learning_rate': 6.239897085647624e-06, 'epoch': 0.44} + 44%|████▍ | 5335/12188 [11:28:47<14:01:34, 7.37s/it] 44%|████▍ | 5336/12188 [11:28:54<13:48:43, 7.26s/it] {'loss': 0.3707, 'grad_norm': 0.7350183782995722, 'learning_rate': 6.238609837538099e-06, 'epoch': 0.44} + 44%|████▍ | 5336/12188 [11:28:54<13:48:43, 7.26s/it] 44%|████▍ | 5337/12188 [11:29:01<13:24:28, 7.05s/it] {'loss': 0.3561, 'grad_norm': 0.7592780315708043, 'learning_rate': 6.237322501959937e-06, 'epoch': 0.44} + 44%|████▍ | 5337/12188 [11:29:01<13:24:28, 7.05s/it] 44%|████▍ | 5338/12188 [11:29:08<13:26:09, 7.06s/it] {'loss': 0.3356, 'grad_norm': 0.6872676705366995, 'learning_rate': 6.236035079004047e-06, 'epoch': 0.44} + 44%|████▍ | 5338/12188 [11:29:08<13:26:09, 7.06s/it] 44%|████▍ | 5339/12188 [11:29:14<13:11:35, 6.93s/it] {'loss': 0.3419, 'grad_norm': 0.6475948826660268, 'learning_rate': 6.234747568761347e-06, 'epoch': 0.44} + 44%|████▍ | 5339/12188 [11:29:14<13:11:35, 6.93s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 90, in pil_loader + return img.convert("RGB") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 993, in convert + self.load() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 319, in load + raise _get_oserror(err_code, encoder=False) +OSError: unrecognized data stream contents when reading image file +[Try #0] Failed to fetch sample 6015277 in VC:s3://gui-agent/data_20250623/windows_augment/images. Exception: unrecognized data stream contents when reading image file +Problematic sample: {'image': 'inventor/20250511_134843_1/images/before_screenshot_1_id_149_internvl_position_crop_0_grounding_instructions_point_o_paste.png', 'conversations': [{'from': 'human', 'value': "\nDetermine the bounding box from: Located in the top toolbar of Autodesk Inventor Professional 2019, positioned in the 'Modify' section. It sits between the 'Combine' button to its left and 'Shell' button to its right, in the second row of the application's ribbon interface."}, {'from': 'gpt', 'value': "Located in the top toolbar of Autodesk Inventor Professional 2019, positioned in the 'Modify' section. It sits between the 'Combine' button to its left and 'Shell' button to its right, in the second row of the application's ribbon interface.[[605, 521, 621, 530]]"}], 'width': 3600, 'height': 2338} + 44%|████▍ | 5340/12188 [11:29:22<13:19:47, 7.01s/it] {'loss': 0.3427, 'grad_norm': 0.6508953879648159, 'learning_rate': 6.233459971322757e-06, 'epoch': 0.44} + 44%|████▍ | 5340/12188 [11:29:22<13:19:47, 7.01s/it] 44%|████▍ | 5341/12188 [11:29:28<13:09:07, 6.92s/it] {'loss': 0.3476, 'grad_norm': 0.6594518732305046, 'learning_rate': 6.232172286779207e-06, 'epoch': 0.44} + 44%|████▍ | 5341/12188 [11:29:28<13:09:07, 6.92s/it] 44%|████▍ | 5342/12188 [11:29:36<13:18:53, 7.00s/it] {'loss': 0.3932, 'grad_norm': 0.6715762156548872, 'learning_rate': 6.230884515221628e-06, 'epoch': 0.44} + 44%|████▍ | 5342/12188 [11:29:36<13:18:53, 7.00s/it] 44%|████▍ | 5343/12188 [11:29:42<13:08:24, 6.91s/it] {'loss': 0.3401, 'grad_norm': 0.5937519173037982, 'learning_rate': 6.229596656740962e-06, 'epoch': 0.44} + 44%|████▍ | 5343/12188 [11:29:42<13:08:24, 6.91s/it] 44%|████▍ | 5344/12188 [11:29:50<13:29:38, 7.10s/it] {'loss': 0.3442, 'grad_norm': 0.6036184991162397, 'learning_rate': 6.228308711428156e-06, 'epoch': 0.44} + 44%|████▍ | 5344/12188 [11:29:50<13:29:38, 7.10s/it] 44%|████▍ | 5345/12188 [11:29:58<13:50:37, 7.28s/it] {'loss': 0.3576, 'grad_norm': 0.634207534747967, 'learning_rate': 6.227020679374162e-06, 'epoch': 0.44} + 44%|████▍ | 5345/12188 [11:29:58<13:50:37, 7.28s/it] 44%|████▍ | 5346/12188 [11:30:05<13:52:25, 7.30s/it] {'loss': 0.351, 'grad_norm': 0.70698607066324, 'learning_rate': 6.2257325606699394e-06, 'epoch': 0.44} + 44%|████▍ | 5346/12188 [11:30:05<13:52:25, 7.30s/it] 44%|████▍ | 5347/12188 [11:30:13<14:07:24, 7.43s/it] {'loss': 0.3271, 'grad_norm': 0.6321356520616177, 'learning_rate': 6.224444355406452e-06, 'epoch': 0.44} + 44%|████▍ | 5347/12188 [11:30:13<14:07:24, 7.43s/it] 44%|████▍ | 5348/12188 [11:30:20<14:10:28, 7.46s/it] {'loss': 0.3268, 'grad_norm': 0.6668518541302388, 'learning_rate': 6.223156063674672e-06, 'epoch': 0.44} + 44%|████▍ | 5348/12188 [11:30:20<14:10:28, 7.46s/it] 44%|████▍ | 5349/12188 [11:30:28<14:13:18, 7.49s/it] {'loss': 0.3382, 'grad_norm': 0.6513172028904911, 'learning_rate': 6.221867685565577e-06, 'epoch': 0.44} + 44%|████▍ | 5349/12188 [11:30:28<14:13:18, 7.49s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 44%|████▍ | 5350/12188 [11:30:34<13:43:28, 7.23s/it] {'loss': 0.6937, 'grad_norm': 0.6554140386226012, 'learning_rate': 6.220579221170148e-06, 'epoch': 0.44} + 44%|████▍ | 5350/12188 [11:30:34<13:43:28, 7.23s/it] 44%|████▍ | 5351/12188 [11:30:45<15:29:38, 8.16s/it] {'loss': 0.3069, 'grad_norm': 0.6285572082764198, 'learning_rate': 6.219290670579376e-06, 'epoch': 0.44} + 44%|████▍ | 5351/12188 [11:30:45<15:29:38, 8.16s/it] 44%|████▍ | 5352/12188 [11:30:52<15:03:50, 7.93s/it] {'loss': 0.3218, 'grad_norm': 0.6492774289287315, 'learning_rate': 6.218002033884254e-06, 'epoch': 0.44} + 44%|████▍ | 5352/12188 [11:30:52<15:03:50, 7.93s/it] 44%|████▍ | 5353/12188 [11:31:00<15:12:31, 8.01s/it] {'loss': 0.331, 'grad_norm': 0.6931704557852036, 'learning_rate': 6.216713311175786e-06, 'epoch': 0.44} + 44%|████▍ | 5353/12188 [11:31:00<15:12:31, 8.01s/it] 44%|████▍ | 5354/12188 [11:31:08<15:21:45, 8.09s/it] {'loss': 0.3133, 'grad_norm': 0.6532870118840011, 'learning_rate': 6.21542450254498e-06, 'epoch': 0.44} + 44%|████▍ | 5354/12188 [11:31:09<15:21:45, 8.09s/it] 44%|████▍ | 5355/12188 [11:31:16<15:02:06, 7.92s/it] {'loss': 0.3723, 'grad_norm': 0.6782259028936538, 'learning_rate': 6.214135608082848e-06, 'epoch': 0.44} + 44%|████▍ | 5355/12188 [11:31:16<15:02:06, 7.92s/it] 44%|████▍ | 5356/12188 [11:31:23<14:21:54, 7.57s/it] {'loss': 0.3301, 'grad_norm': 0.6921848804338662, 'learning_rate': 6.212846627880409e-06, 'epoch': 0.44} + 44%|████▍ | 5356/12188 [11:31:23<14:21:54, 7.57s/it] 44%|████▍ | 5357/12188 [11:31:30<14:08:25, 7.45s/it] {'loss': 0.3313, 'grad_norm': 0.7392834677133558, 'learning_rate': 6.211557562028691e-06, 'epoch': 0.44} + 44%|████▍ | 5357/12188 [11:31:30<14:08:25, 7.45s/it] 44%|████▍ | 5358/12188 [11:31:37<13:55:53, 7.34s/it] {'loss': 0.3286, 'grad_norm': 0.6345976340888854, 'learning_rate': 6.210268410618723e-06, 'epoch': 0.44} + 44%|████▍ | 5358/12188 [11:31:37<13:55:53, 7.34s/it] 44%|████▍ | 5359/12188 [11:31:45<14:05:53, 7.43s/it] {'loss': 0.3254, 'grad_norm': 0.8178809391581554, 'learning_rate': 6.208979173741545e-06, 'epoch': 0.44} + 44%|████▍ | 5359/12188 [11:31:45<14:05:53, 7.43s/it] 44%|████▍ | 5360/12188 [11:31:52<14:02:58, 7.41s/it] {'loss': 0.3434, 'grad_norm': 0.611873512663777, 'learning_rate': 6.2076898514882e-06, 'epoch': 0.44} + 44%|████▍ | 5360/12188 [11:31:52<14:02:58, 7.41s/it] 44%|████▍ | 5361/12188 [11:32:01<14:41:57, 7.75s/it] {'loss': 0.3747, 'grad_norm': 0.6874242602483885, 'learning_rate': 6.206400443949739e-06, 'epoch': 0.44} + 44%|████▍ | 5361/12188 [11:32:01<14:41:57, 7.75s/it] 44%|████▍ | 5362/12188 [11:32:09<15:09:47, 8.00s/it] {'loss': 0.3635, 'grad_norm': 0.6396422364427905, 'learning_rate': 6.205110951217216e-06, 'epoch': 0.44} + 44%|████▍ | 5362/12188 [11:32:09<15:09:47, 8.00s/it] 44%|████▍ | 5363/12188 [11:32:16<14:34:32, 7.69s/it] {'loss': 0.3339, 'grad_norm': 0.6430157115814371, 'learning_rate': 6.2038213733816935e-06, 'epoch': 0.44} + 44%|████▍ | 5363/12188 [11:32:16<14:34:32, 7.69s/it] 44%|████▍ | 5364/12188 [11:32:23<14:17:06, 7.54s/it] {'loss': 0.3469, 'grad_norm': 0.6083142396681176, 'learning_rate': 6.202531710534241e-06, 'epoch': 0.44} + 44%|████▍ | 5364/12188 [11:32:23<14:17:06, 7.54s/it] 44%|████▍ | 5365/12188 [11:32:30<14:04:03, 7.42s/it] {'loss': 0.3578, 'grad_norm': 0.67922160678454, 'learning_rate': 6.201241962765928e-06, 'epoch': 0.44} + 44%|████▍ | 5365/12188 [11:32:30<14:04:03, 7.42s/it] 44%|████▍ | 5366/12188 [11:32:37<13:49:20, 7.29s/it] {'loss': 0.3451, 'grad_norm': 0.6564507068943601, 'learning_rate': 6.19995213016784e-06, 'epoch': 0.44} + 44%|████▍ | 5366/12188 [11:32:37<13:49:20, 7.29s/it] 44%|████▍ | 5367/12188 [11:32:45<13:43:06, 7.24s/it] {'loss': 0.3225, 'grad_norm': 0.6100774607205361, 'learning_rate': 6.198662212831061e-06, 'epoch': 0.44} + 44%|████▍ | 5367/12188 [11:32:45<13:43:06, 7.24s/it] 44%|████▍ | 5368/12188 [11:32:52<13:39:58, 7.21s/it] {'loss': 0.3409, 'grad_norm': 0.5742598233271644, 'learning_rate': 6.197372210846681e-06, 'epoch': 0.44} + 44%|████▍ | 5368/12188 [11:32:52<13:39:58, 7.21s/it] 44%|████▍ | 5369/12188 [11:32:59<13:44:48, 7.26s/it] {'loss': 0.3735, 'grad_norm': 0.7064212911105872, 'learning_rate': 6.196082124305801e-06, 'epoch': 0.44} + 44%|████▍ | 5369/12188 [11:32:59<13:44:48, 7.26s/it] 44%|████▍ | 5370/12188 [11:33:07<14:06:32, 7.45s/it] {'loss': 0.3386, 'grad_norm': 0.6742039074284577, 'learning_rate': 6.1947919532995205e-06, 'epoch': 0.44} + 44%|████▍ | 5370/12188 [11:33:07<14:06:32, 7.45s/it] 44%|████▍ | 5371/12188 [11:33:14<14:06:38, 7.45s/it] {'loss': 0.3543, 'grad_norm': 0.6566202615542254, 'learning_rate': 6.1935016979189534e-06, 'epoch': 0.44} + 44%|████▍ | 5371/12188 [11:33:14<14:06:38, 7.45s/it] 44%|████▍ | 5372/12188 [11:33:22<13:54:41, 7.35s/it] {'loss': 0.3411, 'grad_norm': 0.675050608112663, 'learning_rate': 6.192211358255213e-06, 'epoch': 0.44} + 44%|████▍ | 5372/12188 [11:33:22<13:54:41, 7.35s/it] 44%|████▍ | 5373/12188 [11:33:28<13:34:48, 7.17s/it] {'loss': 0.336, 'grad_norm': 0.6442812659564909, 'learning_rate': 6.190920934399423e-06, 'epoch': 0.44} + 44%|████▍ | 5373/12188 [11:33:28<13:34:48, 7.17s/it] 44%|████▍ | 5374/12188 [11:33:35<13:15:35, 7.01s/it] {'loss': 0.3703, 'grad_norm': 0.7323866584048604, 'learning_rate': 6.189630426442709e-06, 'epoch': 0.44} + 44%|████▍ | 5374/12188 [11:33:35<13:15:35, 7.01s/it] 44%|████▍ | 5375/12188 [11:33:44<14:13:32, 7.52s/it] {'loss': 0.3361, 'grad_norm': 0.6389016759967228, 'learning_rate': 6.188339834476207e-06, 'epoch': 0.44} + 44%|████▍ | 5375/12188 [11:33:44<14:13:32, 7.52s/it] 44%|████▍ | 5376/12188 [11:33:51<13:54:19, 7.35s/it] {'loss': 0.3378, 'grad_norm': 0.6495036343423697, 'learning_rate': 6.1870491585910545e-06, 'epoch': 0.44} + 44%|████▍ | 5376/12188 [11:33:51<13:54:19, 7.35s/it] 44%|████▍ | 5377/12188 [11:33:58<13:56:51, 7.37s/it] {'loss': 0.3379, 'grad_norm': 0.6938208198054447, 'learning_rate': 6.185758398878396e-06, 'epoch': 0.44} + 44%|████▍ | 5377/12188 [11:33:58<13:56:51, 7.37s/it] 44%|████▍ | 5378/12188 [11:34:05<13:47:19, 7.29s/it] {'loss': 0.3179, 'grad_norm': 0.6063668198884578, 'learning_rate': 6.184467555429386e-06, 'epoch': 0.44} + 44%|████▍ | 5378/12188 [11:34:05<13:47:19, 7.29s/it] 44%|████▍ | 5379/12188 [11:34:12<13:50:36, 7.32s/it] {'loss': 0.3554, 'grad_norm': 0.6661746800269878, 'learning_rate': 6.183176628335179e-06, 'epoch': 0.44} + 44%|████▍ | 5379/12188 [11:34:13<13:50:36, 7.32s/it] 44%|████▍ | 5380/12188 [11:34:19<13:33:22, 7.17s/it] {'loss': 0.3389, 'grad_norm': 0.6352408718499332, 'learning_rate': 6.18188561768694e-06, 'epoch': 0.44} + 44%|████▍ | 5380/12188 [11:34:19<13:33:22, 7.17s/it] 44%|████▍ | 5381/12188 [11:34:27<13:49:42, 7.31s/it] {'loss': 0.3384, 'grad_norm': 0.6357264294956044, 'learning_rate': 6.180594523575838e-06, 'epoch': 0.44} + 44%|████▍ | 5381/12188 [11:34:27<13:49:42, 7.31s/it] 44%|████▍ | 5382/12188 [11:34:34<13:40:16, 7.23s/it] {'loss': 0.3256, 'grad_norm': 0.6396849797399508, 'learning_rate': 6.179303346093048e-06, 'epoch': 0.44} + 44%|████▍ | 5382/12188 [11:34:34<13:40:16, 7.23s/it] 44%|████▍ | 5383/12188 [11:34:42<14:00:25, 7.41s/it] {'loss': 0.3614, 'grad_norm': 0.6211383089939199, 'learning_rate': 6.17801208532975e-06, 'epoch': 0.44} + 44%|████▍ | 5383/12188 [11:34:42<14:00:25, 7.41s/it] 44%|████▍ | 5384/12188 [11:34:49<13:44:53, 7.27s/it] {'loss': 0.343, 'grad_norm': 0.7194091869629602, 'learning_rate': 6.17672074137713e-06, 'epoch': 0.44} + 44%|████▍ | 5384/12188 [11:34:49<13:44:53, 7.27s/it] 44%|████▍ | 5385/12188 [11:34:56<13:35:34, 7.19s/it] {'loss': 0.3445, 'grad_norm': 0.6548287812933139, 'learning_rate': 6.175429314326383e-06, 'epoch': 0.44} + 44%|████▍ | 5385/12188 [11:34:56<13:35:34, 7.19s/it] 44%|████▍ | 5386/12188 [11:35:02<13:13:37, 7.00s/it] {'loss': 0.3635, 'grad_norm': 0.729251482815104, 'learning_rate': 6.174137804268706e-06, 'epoch': 0.44} + 44%|████▍ | 5386/12188 [11:35:02<13:13:37, 7.00s/it] 44%|████▍ | 5387/12188 [11:35:09<13:10:37, 6.98s/it] {'loss': 0.3328, 'grad_norm': 0.6912858075840108, 'learning_rate': 6.172846211295303e-06, 'epoch': 0.44} + 44%|████▍ | 5387/12188 [11:35:09<13:10:37, 6.98s/it] 44%|████▍ | 5388/12188 [11:35:16<12:57:07, 6.86s/it] {'loss': 0.3629, 'grad_norm': 0.6744932942351691, 'learning_rate': 6.171554535497385e-06, 'epoch': 0.44} + 44%|████▍ | 5388/12188 [11:35:16<12:57:07, 6.86s/it] 44%|████▍ | 5389/12188 [11:35:23<12:53:54, 6.83s/it] {'loss': 0.3569, 'grad_norm': 0.6744976915004357, 'learning_rate': 6.1702627769661695e-06, 'epoch': 0.44} + 44%|████▍ | 5389/12188 [11:35:23<12:53:54, 6.83s/it] 44%|████▍ | 5390/12188 [11:35:30<13:08:26, 6.96s/it] {'loss': 0.3328, 'grad_norm': 0.6441835271963754, 'learning_rate': 6.168970935792876e-06, 'epoch': 0.44} + 44%|████▍ | 5390/12188 [11:35:30<13:08:26, 6.96s/it] 44%|████▍ | 5391/12188 [11:35:37<12:59:16, 6.88s/it] {'loss': 0.3183, 'grad_norm': 0.62192963929581, 'learning_rate': 6.167679012068732e-06, 'epoch': 0.44} + 44%|████▍ | 5391/12188 [11:35:37<12:59:16, 6.88s/it] 44%|████▍ | 5392/12188 [11:35:44<13:27:08, 7.13s/it] {'loss': 0.3238, 'grad_norm': 0.5923058733546206, 'learning_rate': 6.1663870058849725e-06, 'epoch': 0.44} + 44%|████▍ | 5392/12188 [11:35:44<13:27:08, 7.13s/it] 44%|████▍ | 5393/12188 [11:35:51<13:28:28, 7.14s/it] {'loss': 0.3218, 'grad_norm': 0.6994600127018021, 'learning_rate': 6.165094917332838e-06, 'epoch': 0.44} + 44%|████▍ | 5393/12188 [11:35:51<13:28:28, 7.14s/it] 44%|████▍ | 5394/12188 [11:35:59<13:40:54, 7.25s/it] {'loss': 0.3566, 'grad_norm': 0.6849920904922439, 'learning_rate': 6.163802746503571e-06, 'epoch': 0.44} + 44%|████▍ | 5394/12188 [11:35:59<13:40:54, 7.25s/it] 44%|████▍ | 5395/12188 [11:36:06<13:27:17, 7.13s/it] {'loss': 0.338, 'grad_norm': 0.6742896100282577, 'learning_rate': 6.162510493488425e-06, 'epoch': 0.44} + 44%|████▍ | 5395/12188 [11:36:06<13:27:17, 7.13s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 44%|████▍ | 5396/12188 [11:36:13<13:16:11, 7.03s/it] {'loss': 0.6943, 'grad_norm': 0.654505191935681, 'learning_rate': 6.161218158378655e-06, 'epoch': 0.44} + 44%|████▍ | 5396/12188 [11:36:13<13:16:11, 7.03s/it] 44%|████▍ | 5397/12188 [11:36:20<13:23:55, 7.10s/it] {'loss': 0.3174, 'grad_norm': 0.6302788695247225, 'learning_rate': 6.159925741265525e-06, 'epoch': 0.44} + 44%|████▍ | 5397/12188 [11:36:20<13:23:55, 7.10s/it] 44%|████▍ | 5398/12188 [11:36:27<13:17:01, 7.04s/it] {'loss': 0.3397, 'grad_norm': 0.6445960262525062, 'learning_rate': 6.158633242240301e-06, 'epoch': 0.44} + 44%|████▍ | 5398/12188 [11:36:27<13:17:01, 7.04s/it] 44%|████▍ | 5399/12188 [11:36:35<14:13:19, 7.54s/it] {'loss': 0.3253, 'grad_norm': 0.6230789221401076, 'learning_rate': 6.15734066139426e-06, 'epoch': 0.44} + 44%|████▍ | 5399/12188 [11:36:35<14:13:19, 7.54s/it] 44%|████▍ | 5400/12188 [11:36:44<14:49:18, 7.86s/it] {'loss': 0.3251, 'grad_norm': 0.6254752156079448, 'learning_rate': 6.156047998818681e-06, 'epoch': 0.44} + 44%|████▍ | 5400/12188 [11:36:44<14:49:18, 7.86s/it] 44%|████▍ | 5401/12188 [11:36:51<14:04:09, 7.46s/it] {'loss': 0.3625, 'grad_norm': 0.6310018888023665, 'learning_rate': 6.154755254604848e-06, 'epoch': 0.44} + 44%|████▍ | 5401/12188 [11:36:51<14:04:09, 7.46s/it] 44%|████▍ | 5402/12188 [11:36:57<13:37:32, 7.23s/it] {'loss': 0.3492, 'grad_norm': 0.6421411667108851, 'learning_rate': 6.153462428844056e-06, 'epoch': 0.44} + 44%|████▍ | 5402/12188 [11:36:57<13:37:32, 7.23s/it] 44%|████▍ | 5403/12188 [11:37:05<13:49:23, 7.33s/it] {'loss': 0.3727, 'grad_norm': 0.6639064492878384, 'learning_rate': 6.1521695216276015e-06, 'epoch': 0.44} + 44%|████▍ | 5403/12188 [11:37:05<13:49:23, 7.33s/it] 44%|████▍ | 5404/12188 [11:37:12<13:52:57, 7.37s/it] {'loss': 0.3447, 'grad_norm': 0.6873662441543628, 'learning_rate': 6.150876533046784e-06, 'epoch': 0.44} + 44%|████��� | 5404/12188 [11:37:12<13:52:57, 7.37s/it] 44%|████▍ | 5405/12188 [11:37:19<13:44:28, 7.29s/it] {'loss': 0.369, 'grad_norm': 0.6619976750739308, 'learning_rate': 6.149583463192916e-06, 'epoch': 0.44} + 44%|████▍ | 5405/12188 [11:37:19<13:44:28, 7.29s/it] 44%|████▍ | 5406/12188 [11:37:27<13:51:48, 7.36s/it] {'loss': 0.3469, 'grad_norm': 0.6833161685079882, 'learning_rate': 6.14829031215731e-06, 'epoch': 0.44} + 44%|████▍ | 5406/12188 [11:37:27<13:51:48, 7.36s/it] 44%|████▍ | 5407/12188 [11:37:34<13:31:48, 7.18s/it] {'loss': 0.355, 'grad_norm': 0.8072335641257796, 'learning_rate': 6.146997080031289e-06, 'epoch': 0.44} + 44%|████▍ | 5407/12188 [11:37:34<13:31:48, 7.18s/it] 44%|████▍ | 5408/12188 [11:37:41<13:25:53, 7.13s/it] {'loss': 0.3181, 'grad_norm': 0.6771199350216374, 'learning_rate': 6.145703766906175e-06, 'epoch': 0.44} + 44%|████▍ | 5408/12188 [11:37:41<13:25:53, 7.13s/it] 44%|████▍ | 5409/12188 [11:37:48<13:25:53, 7.13s/it] {'loss': 0.3259, 'grad_norm': 0.6122944274195639, 'learning_rate': 6.144410372873302e-06, 'epoch': 0.44} + 44%|████▍ | 5409/12188 [11:37:48<13:25:53, 7.13s/it] 44%|████▍ | 5410/12188 [11:37:55<13:28:06, 7.15s/it] {'loss': 0.3418, 'grad_norm': 0.6369071005966971, 'learning_rate': 6.143116898024007e-06, 'epoch': 0.44} + 44%|████▍ | 5410/12188 [11:37:55<13:28:06, 7.15s/it] 44%|████▍ | 5411/12188 [11:38:02<13:10:11, 7.00s/it] {'loss': 0.3086, 'grad_norm': 0.7273487089284443, 'learning_rate': 6.141823342449634e-06, 'epoch': 0.44} + 44%|████▍ | 5411/12188 [11:38:02<13:10:11, 7.00s/it] 44%|████▍ | 5412/12188 [11:38:09<13:05:55, 6.96s/it] {'loss': 0.3596, 'grad_norm': 0.6331981987262111, 'learning_rate': 6.140529706241531e-06, 'epoch': 0.44} + 44%|████▍ | 5412/12188 [11:38:09<13:05:55, 6.96s/it] 44%|████▍ | 5413/12188 [11:38:17<14:03:05, 7.47s/it] {'loss': 0.3482, 'grad_norm': 0.6697665422086079, 'learning_rate': 6.1392359894910526e-06, 'epoch': 0.44} + 44%|████▍ | 5413/12188 [11:38:17<14:03:05, 7.47s/it] 44%|████▍ | 5414/12188 [11:38:24<13:54:00, 7.39s/it] {'loss': 0.3457, 'grad_norm': 0.6560347256514527, 'learning_rate': 6.137942192289559e-06, 'epoch': 0.44} + 44%|████▍ | 5414/12188 [11:38:24<13:54:00, 7.39s/it] 44%|████▍ | 5415/12188 [11:38:32<13:47:12, 7.33s/it] {'loss': 0.3357, 'grad_norm': 0.6824475554018098, 'learning_rate': 6.136648314728414e-06, 'epoch': 0.44} + 44%|████▍ | 5415/12188 [11:38:32<13:47:12, 7.33s/it] 44%|████▍ | 5416/12188 [11:38:39<14:03:28, 7.47s/it] {'loss': 0.319, 'grad_norm': 0.7012592175959917, 'learning_rate': 6.135354356898992e-06, 'epoch': 0.44} + 44%|████▍ | 5416/12188 [11:38:39<14:03:28, 7.47s/it] 44%|████▍ | 5417/12188 [11:38:46<13:48:52, 7.34s/it] {'loss': 0.3341, 'grad_norm': 0.5875426265377571, 'learning_rate': 6.134060318892672e-06, 'epoch': 0.44} + 44%|████▍ | 5417/12188 [11:38:46<13:48:52, 7.34s/it] 44%|████▍ | 5418/12188 [11:38:53<13:26:18, 7.15s/it] {'loss': 0.3381, 'grad_norm': 0.649209060668112, 'learning_rate': 6.1327662008008326e-06, 'epoch': 0.44} + 44%|████▍ | 5418/12188 [11:38:53<13:26:18, 7.15s/it] 44%|████▍ | 5419/12188 [11:39:00<13:31:31, 7.19s/it] {'loss': 0.3455, 'grad_norm': 0.666071561830033, 'learning_rate': 6.131472002714863e-06, 'epoch': 0.44} + 44%|████▍ | 5419/12188 [11:39:00<13:31:31, 7.19s/it] 44%|████▍ | 5420/12188 [11:39:08<13:30:44, 7.19s/it] {'loss': 0.3122, 'grad_norm': 0.6720492815756898, 'learning_rate': 6.13017772472616e-06, 'epoch': 0.44} + 44%|████▍ | 5420/12188 [11:39:08<13:30:44, 7.19s/it] 44%|████▍ | 5421/12188 [11:39:15<13:34:19, 7.22s/it] {'loss': 0.3542, 'grad_norm': 0.6318987706302928, 'learning_rate': 6.12888336692612e-06, 'epoch': 0.44} + 44%|████▍ | 5421/12188 [11:39:15<13:34:19, 7.22s/it] 44%|████▍ | 5422/12188 [11:39:22<13:20:24, 7.10s/it] {'loss': 0.3127, 'grad_norm': 0.6599169190255967, 'learning_rate': 6.127588929406152e-06, 'epoch': 0.44} + 44%|████▍ | 5422/12188 [11:39:22<13:20:24, 7.10s/it] 44%|████▍ | 5423/12188 [11:39:28<13:04:47, 6.96s/it] {'loss': 0.3431, 'grad_norm': 1.0434883365326493, 'learning_rate': 6.1262944122576655e-06, 'epoch': 0.44} + 44%|████▍ | 5423/12188 [11:39:28<13:04:47, 6.96s/it] 45%|████▍ | 5424/12188 [11:39:35<12:56:02, 6.88s/it] {'loss': 0.3456, 'grad_norm': 0.6541377258581761, 'learning_rate': 6.124999815572077e-06, 'epoch': 0.45} + 45%|████▍ | 5424/12188 [11:39:35<12:56:02, 6.88s/it] 45%|████▍ | 5425/12188 [11:39:43<13:33:53, 7.22s/it] {'loss': 0.325, 'grad_norm': 0.6178473486251103, 'learning_rate': 6.123705139440807e-06, 'epoch': 0.45} + 45%|████▍ | 5425/12188 [11:39:43<13:33:53, 7.22s/it] 45%|████▍ | 5426/12188 [11:39:50<13:35:20, 7.23s/it] {'loss': 0.3356, 'grad_norm': 0.6498704162262888, 'learning_rate': 6.122410383955288e-06, 'epoch': 0.45} + 45%|████▍ | 5426/12188 [11:39:50<13:35:20, 7.23s/it] 45%|████▍ | 5427/12188 [11:39:57<13:15:11, 7.06s/it] {'loss': 0.2974, 'grad_norm': 0.639599276148696, 'learning_rate': 6.12111554920695e-06, 'epoch': 0.45} + 45%|████▍ | 5427/12188 [11:39:57<13:15:11, 7.06s/it] 45%|████▍ | 5428/12188 [11:40:04<12:57:11, 6.90s/it] {'loss': 0.3546, 'grad_norm': 0.6102688000203615, 'learning_rate': 6.119820635287231e-06, 'epoch': 0.45} + 45%|████▍ | 5428/12188 [11:40:04<12:57:11, 6.90s/it] 45%|████▍ | 5429/12188 [11:40:10<12:55:13, 6.88s/it] {'loss': 0.3726, 'grad_norm': 0.6216565122007832, 'learning_rate': 6.118525642287581e-06, 'epoch': 0.45} + 45%|████▍ | 5429/12188 [11:40:10<12:55:13, 6.88s/it] 45%|████▍ | 5430/12188 [11:40:17<12:40:59, 6.76s/it] {'loss': 0.3145, 'grad_norm': 0.6597454353936569, 'learning_rate': 6.117230570299446e-06, 'epoch': 0.45} + 45%|████▍ | 5430/12188 [11:40:17<12:40:59, 6.76s/it] 45%|████▍ | 5431/12188 [11:40:25<13:28:46, 7.18s/it] {'loss': 0.2929, 'grad_norm': 0.6212861501415113, 'learning_rate': 6.115935419414284e-06, 'epoch': 0.45} + 45%|████▍ | 5431/12188 [11:40:25<13:28:46, 7.18s/it] 45%|████▍ | 5432/12188 [11:40:32<13:13:04, 7.04s/it] {'loss': 0.3031, 'grad_norm': 0.6250602318848004, 'learning_rate': 6.114640189723554e-06, 'epoch': 0.45} + 45%|████▍ | 5432/12188 [11:40:32<13:13:04, 7.04s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f3368ddf5b0> +[Try #0] Failed to fetch sample 4466302 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f3368ddf5b0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'create an account'"}, {'from': 'gpt', 'value': '\nclick(x=0.9455, y=0.384)\n'}]} + 45%|████▍ | 5433/12188 [11:40:40<13:53:40, 7.40s/it] {'loss': 0.3666, 'grad_norm': 0.6612160527293325, 'learning_rate': 6.113344881318726e-06, 'epoch': 0.45} + 45%|████▍ | 5433/12188 [11:40:40<13:53:40, 7.40s/it] 45%|████▍ | 5434/12188 [11:40:48<14:00:34, 7.47s/it] {'loss': 0.3936, 'grad_norm': 0.6437606733392837, 'learning_rate': 6.11204949429127e-06, 'epoch': 0.45} + 45%|████▍ | 5434/12188 [11:40:48<14:00:34, 7.47s/it] 45%|████▍ | 5435/12188 [11:40:55<13:44:35, 7.33s/it] {'loss': 0.3737, 'grad_norm': 0.6634903613112948, 'learning_rate': 6.110754028732666e-06, 'epoch': 0.45} + 45%|████▍ | 5435/12188 [11:40:55<13:44:35, 7.33s/it] 45%|████▍ | 5436/12188 [11:41:02<13:41:34, 7.30s/it] {'loss': 0.3448, 'grad_norm': 0.613275901211568, 'learning_rate': 6.109458484734397e-06, 'epoch': 0.45} + 45%|████▍ | 5436/12188 [11:41:02<13:41:34, 7.30s/it] 45%|████▍ | 5437/12188 [11:41:09<13:48:04, 7.36s/it] {'loss': 0.353, 'grad_norm': 0.6585234191371513, 'learning_rate': 6.108162862387952e-06, 'epoch': 0.45} + 45%|████▍ | 5437/12188 [11:41:09<13:48:04, 7.36s/it] 45%|████▍ | 5438/12188 [11:41:16<13:22:36, 7.13s/it] {'loss': 0.3503, 'grad_norm': 0.6988412191583517, 'learning_rate': 6.1068671617848266e-06, 'epoch': 0.45} + 45%|████▍ | 5438/12188 [11:41:16<13:22:36, 7.13s/it] 45%|████▍ | 5439/12188 [11:41:23<13:30:02, 7.20s/it] {'loss': 0.3477, 'grad_norm': 0.647452712607638, 'learning_rate': 6.105571383016521e-06, 'epoch': 0.45} + 45%|████▍ | 5439/12188 [11:41:23<13:30:02, 7.20s/it] 45%|████▍ | 5440/12188 [11:41:30<13:13:21, 7.05s/it] {'loss': 0.334, 'grad_norm': 0.7047629677777232, 'learning_rate': 6.104275526174539e-06, 'epoch': 0.45} + 45%|████▍ | 5440/12188 [11:41:30<13:13:21, 7.05s/it] 45%|████▍ | 5441/12188 [11:41:38<13:56:15, 7.44s/it] {'loss': 0.3157, 'grad_norm': 0.6175094214760531, 'learning_rate': 6.102979591350393e-06, 'epoch': 0.45} + 45%|████▍ | 5441/12188 [11:41:38<13:56:15, 7.44s/it] 45%|████▍ | 5442/12188 [11:41:47<14:31:19, 7.75s/it] {'loss': 0.3212, 'grad_norm': 0.6316573000828313, 'learning_rate': 6.101683578635602e-06, 'epoch': 0.45} + 45%|████▍ | 5442/12188 [11:41:47<14:31:19, 7.75s/it] 45%|████▍ | 5443/12188 [11:41:54<14:17:16, 7.63s/it] {'loss': 0.3241, 'grad_norm': 0.6276254095547698, 'learning_rate': 6.100387488121686e-06, 'epoch': 0.45} + 45%|████▍ | 5443/12188 [11:41:54<14:17:16, 7.63s/it] 45%|████▍ | 5444/12188 [11:42:03<14:51:39, 7.93s/it] {'loss': 0.3488, 'grad_norm': 0.6532827843811483, 'learning_rate': 6.099091319900174e-06, 'epoch': 0.45} + 45%|████▍ | 5444/12188 [11:42:03<14:51:39, 7.93s/it] 45%|████▍ | 5445/12188 [11:42:10<14:12:15, 7.58s/it] {'loss': 0.3516, 'grad_norm': 0.6597437889245041, 'learning_rate': 6.0977950740626e-06, 'epoch': 0.45} + 45%|████▍ | 5445/12188 [11:42:10<14:12:15, 7.58s/it] 45%|████▍ | 5446/12188 [11:42:18<14:35:28, 7.79s/it] {'loss': 0.3677, 'grad_norm': 0.673465416955106, 'learning_rate': 6.0964987507004995e-06, 'epoch': 0.45} + 45%|████▍ | 5446/12188 [11:42:18<14:35:28, 7.79s/it] 45%|████▍ | 5447/12188 [11:42:25<14:12:28, 7.59s/it] {'loss': 0.328, 'grad_norm': 0.6929750532437164, 'learning_rate': 6.09520234990542e-06, 'epoch': 0.45} + 45%|████▍ | 5447/12188 [11:42:25<14:12:28, 7.59s/it] 45%|████▍ | 5448/12188 [11:42:32<14:08:52, 7.56s/it] {'loss': 0.3201, 'grad_norm': 0.6602300092819677, 'learning_rate': 6.09390587176891e-06, 'epoch': 0.45} + 45%|████▍ | 5448/12188 [11:42:32<14:08:52, 7.56s/it] 45%|████▍ | 5449/12188 [11:42:39<13:43:51, 7.34s/it] {'loss': 0.3889, 'grad_norm': 0.6363767547081828, 'learning_rate': 6.092609316382524e-06, 'epoch': 0.45} + 45%|████▍ | 5449/12188 [11:42:39<13:43:51, 7.34s/it] 45%|████▍ | 5450/12188 [11:42:47<13:41:27, 7.31s/it] {'loss': 0.3335, 'grad_norm': 0.6799568811921192, 'learning_rate': 6.091312683837823e-06, 'epoch': 0.45} + 45%|████▍ | 5450/12188 [11:42:47<13:41:27, 7.31s/it] 45%|████▍ | 5451/12188 [11:42:54<13:47:15, 7.37s/it] {'loss': 0.3101, 'grad_norm': 0.6139865195769476, 'learning_rate': 6.090015974226376e-06, 'epoch': 0.45} + 45%|████▍ | 5451/12188 [11:42:54<13:47:15, 7.37s/it] 45%|████▍ | 5452/12188 [11:43:02<13:53:02, 7.42s/it] {'loss': 0.3574, 'grad_norm': 0.7330366935895841, 'learning_rate': 6.088719187639751e-06, 'epoch': 0.45} + 45%|████▍ | 5452/12188 [11:43:02<13:53:02, 7.42s/it] 45%|████▍ | 5453/12188 [11:43:09<14:02:07, 7.50s/it] {'loss': 0.3307, 'grad_norm': 0.7326798797040889, 'learning_rate': 6.0874223241695265e-06, 'epoch': 0.45} + 45%|████▍ | 5453/12188 [11:43:09<14:02:07, 7.50s/it] 45%|████▍ | 5454/12188 [11:43:17<13:59:02, 7.48s/it] {'loss': 0.4055, 'grad_norm': 0.6421716446731747, 'learning_rate': 6.0861253839072844e-06, 'epoch': 0.45} + 45%|████▍ | 5454/12188 [11:43:17<13:59:02, 7.48s/it] 45%|████▍ | 5455/12188 [11:43:24<13:43:55, 7.34s/it] {'loss': 0.3494, 'grad_norm': 0.6470539648463657, 'learning_rate': 6.084828366944611e-06, 'epoch': 0.45} + 45%|████▍ | 5455/12188 [11:43:24<13:43:55, 7.34s/it] 45%|████▍ | 5456/12188 [11:43:31<13:51:57, 7.41s/it] {'loss': 0.3372, 'grad_norm': 0.6684281477838185, 'learning_rate': 6.0835312733731025e-06, 'epoch': 0.45} + 45%|████▍ | 5456/12188 [11:43:31<13:51:57, 7.41s/it] 45%|████▍ | 5457/12188 [11:43:38<13:27:46, 7.20s/it] {'loss': 0.3504, 'grad_norm': 0.687067021851575, 'learning_rate': 6.0822341032843554e-06, 'epoch': 0.45} + 45%|████▍ | 5457/12188 [11:43:38<13:27:46, 7.20s/it] 45%|████▍ | 5458/12188 [11:43:46<13:54:26, 7.44s/it] {'loss': 0.3251, 'grad_norm': 0.7155176179976332, 'learning_rate': 6.080936856769977e-06, 'epoch': 0.45} + 45%|████▍ | 5458/12188 [11:43:46<13:54:26, 7.44s/it] 45%|████▍ | 5459/12188 [11:43:53<13:50:31, 7.41s/it] {'loss': 0.3273, 'grad_norm': 0.6532729819069926, 'learning_rate': 6.079639533921573e-06, 'epoch': 0.45} + 45%|████▍ | 5459/12188 [11:43:53<13:50:31, 7.41s/it] 45%|████▍ | 5460/12188 [11:44:00<13:26:50, 7.20s/it] {'loss': 0.339, 'grad_norm': 0.6634569288662667, 'learning_rate': 6.078342134830759e-06, 'epoch': 0.45} + 45%|████▍ | 5460/12188 [11:44:00<13:26:50, 7.20s/it] 45%|████▍ | 5461/12188 [11:44:07<13:23:35, 7.17s/it] {'loss': 0.3306, 'grad_norm': 0.6400452476836332, 'learning_rate': 6.077044659589157e-06, 'epoch': 0.45} + 45%|████▍ | 5461/12188 [11:44:07<13:23:35, 7.17s/it] 45%|████▍ | 5462/12188 [11:44:15<13:43:36, 7.35s/it] {'loss': 0.3254, 'grad_norm': 0.6785973744575335, 'learning_rate': 6.075747108288391e-06, 'epoch': 0.45} + 45%|████▍ | 5462/12188 [11:44:15<13:43:36, 7.35s/it] 45%|████▍ | 5463/12188 [11:44:22<13:47:55, 7.39s/it] {'loss': 0.3627, 'grad_norm': 0.6695437828303891, 'learning_rate': 6.0744494810200925e-06, 'epoch': 0.45} + 45%|████▍ | 5463/12188 [11:44:22<13:47:55, 7.39s/it] 45%|████▍ | 5464/12188 [11:44:29<13:25:40, 7.19s/it] {'loss': 0.3386, 'grad_norm': 0.6005774429460095, 'learning_rate': 6.073151777875898e-06, 'epoch': 0.45} + 45%|████▍ | 5464/12188 [11:44:29<13:25:40, 7.19s/it] 45%|████▍ | 5465/12188 [11:44:36<13:14:56, 7.09s/it] {'loss': 0.3207, 'grad_norm': 0.6887394624298431, 'learning_rate': 6.07185399894745e-06, 'epoch': 0.45} + 45%|████▍ | 5465/12188 [11:44:36<13:14:56, 7.09s/it] 45%|████▍ | 5466/12188 [11:44:43<13:05:46, 7.01s/it] {'loss': 0.3432, 'grad_norm': 0.6782027417523246, 'learning_rate': 6.070556144326394e-06, 'epoch': 0.45} + 45%|████▍ | 5466/12188 [11:44:43<13:05:46, 7.01s/it] 45%|████▍ | 5467/12188 [11:44:50<13:12:36, 7.08s/it] {'loss': 0.3349, 'grad_norm': 0.6624356347499887, 'learning_rate': 6.069258214104383e-06, 'epoch': 0.45} + 45%|████▍ | 5467/12188 [11:44:50<13:12:36, 7.08s/it] 45%|████▍ | 5468/12188 [11:44:56<12:49:54, 6.87s/it] {'loss': 0.3596, 'grad_norm': 0.6940690266691288, 'learning_rate': 6.067960208373074e-06, 'epoch': 0.45} + 45%|████▍ | 5468/12188 [11:44:56<12:49:54, 6.87s/it] 45%|████▍ | 5469/12188 [11:45:03<12:56:46, 6.94s/it] {'loss': 0.3138, 'grad_norm': 0.6894500474263449, 'learning_rate': 6.066662127224131e-06, 'epoch': 0.45} + 45%|████▍ | 5469/12188 [11:45:04<12:56:46, 6.94s/it] 45%|████▍ | 5470/12188 [11:45:12<13:45:04, 7.37s/it] {'loss': 0.325, 'grad_norm': 0.6504563324575957, 'learning_rate': 6.065363970749223e-06, 'epoch': 0.45} + 45%|████▍ | 5470/12188 [11:45:12<13:45:04, 7.37s/it] 45%|████▍ | 5471/12188 [11:45:19<13:20:18, 7.15s/it] {'loss': 0.3411, 'grad_norm': 0.740866555438256, 'learning_rate': 6.064065739040024e-06, 'epoch': 0.45} + 45%|████▍ | 5471/12188 [11:45:19<13:20:18, 7.15s/it] 45%|████▍ | 5472/12188 [11:45:27<13:57:26, 7.48s/it] {'loss': 0.3306, 'grad_norm': 0.8863520606927088, 'learning_rate': 6.06276743218821e-06, 'epoch': 0.45} + 45%|████▍ | 5472/12188 [11:45:27<13:57:26, 7.48s/it] 45%|████▍ | 5473/12188 [11:45:35<14:38:14, 7.85s/it] {'loss': 0.3427, 'grad_norm': 0.6621580863688217, 'learning_rate': 6.061469050285469e-06, 'epoch': 0.45} + 45%|████▍ | 5473/12188 [11:45:35<14:38:14, 7.85s/it] 45%|████▍ | 5474/12188 [11:45:43<14:32:21, 7.80s/it] {'loss': 0.3613, 'grad_norm': 0.701148129491801, 'learning_rate': 6.060170593423488e-06, 'epoch': 0.45} + 45%|████▍ | 5474/12188 [11:45:43<14:32:21, 7.80s/it] 45%|████▍ | 5475/12188 [11:45:50<14:08:06, 7.58s/it] {'loss': 0.3568, 'grad_norm': 0.6384967786335821, 'learning_rate': 6.058872061693963e-06, 'epoch': 0.45} + 45%|████▍ | 5475/12188 [11:45:50<14:08:06, 7.58s/it] 45%|████▍ | 5476/12188 [11:45:58<14:24:25, 7.73s/it] {'loss': 0.3393, 'grad_norm': 0.6421604082407371, 'learning_rate': 6.0575734551885955e-06, 'epoch': 0.45} + 45%|████▍ | 5476/12188 [11:45:58<14:24:25, 7.73s/it] 45%|████▍ | 5477/12188 [11:46:07<14:45:57, 7.92s/it] {'loss': 0.3557, 'grad_norm': 0.7392197567798597, 'learning_rate': 6.056274773999088e-06, 'epoch': 0.45} + 45%|████▍ | 5477/12188 [11:46:07<14:45:57, 7.92s/it] 45%|████▍ | 5478/12188 [11:46:14<14:29:04, 7.77s/it] {'loss': 0.3534, 'grad_norm': 0.6439159539738014, 'learning_rate': 6.0549760182171535e-06, 'epoch': 0.45} + 45%|████▍ | 5478/12188 [11:46:14<14:29:04, 7.77s/it] 45%|████▍ | 5479/12188 [11:46:21<13:59:29, 7.51s/it] {'loss': 0.3498, 'grad_norm': 0.6426759723779124, 'learning_rate': 6.053677187934509e-06, 'epoch': 0.45} + 45%|████▍ | 5479/12188 [11:46:21<13:59:29, 7.51s/it] 45%|████▍ | 5480/12188 [11:46:28<13:53:21, 7.45s/it] {'loss': 0.3295, 'grad_norm': 0.6179923165702758, 'learning_rate': 6.052378283242873e-06, 'epoch': 0.45} + 45%|████▍ | 5480/12188 [11:46:28<13:53:21, 7.45s/it] 45%|████▍ | 5481/12188 [11:46:37<14:25:49, 7.75s/it] {'loss': 0.327, 'grad_norm': 0.6080740011818413, 'learning_rate': 6.051079304233974e-06, 'epoch': 0.45} + 45%|████▍ | 5481/12188 [11:46:37<14:25:49, 7.75s/it] 45%|████▍ | 5482/12188 [11:46:44<14:00:58, 7.52s/it] {'loss': 0.3068, 'grad_norm': 0.6710371434961294, 'learning_rate': 6.0497802509995415e-06, 'epoch': 0.45} + 45%|████▍ | 5482/12188 [11:46:44<14:00:58, 7.52s/it] 45%|████▍ | 5483/12188 [11:46:51<13:58:06, 7.50s/it] {'loss': 0.328, 'grad_norm': 0.7139480876138421, 'learning_rate': 6.0484811236313165e-06, 'epoch': 0.45} + 45%|████▍ | 5483/12188 [11:46:51<13:58:06, 7.50s/it] 45%|████▍ | 5484/12188 [11:46:59<14:03:09, 7.55s/it] {'loss': 0.289, 'grad_norm': 0.6533839053368865, 'learning_rate': 6.047181922221038e-06, 'epoch': 0.45} + 45%|████▍ | 5484/12188 [11:46:59<14:03:09, 7.55s/it] 45%|████▌ | 5485/12188 [11:47:06<14:03:47, 7.55s/it] {'loss': 0.3382, 'grad_norm': 0.6357370025907507, 'learning_rate': 6.045882646860455e-06, 'epoch': 0.45} + 45%|████▌ | 5485/12188 [11:47:06<14:03:47, 7.55s/it] 45%|████▌ | 5486/12188 [11:47:14<13:50:12, 7.43s/it] {'loss': 0.3609, 'grad_norm': 0.6510978201072027, 'learning_rate': 6.0445832976413195e-06, 'epoch': 0.45} + 45%|████▌ | 5486/12188 [11:47:14<13:50:12, 7.43s/it] 45%|████▌ | 5487/12188 [11:47:21<13:34:30, 7.29s/it] {'loss': 0.3342, 'grad_norm': 0.6520876902952883, 'learning_rate': 6.04328387465539e-06, 'epoch': 0.45} + 45%|████▌ | 5487/12188 [11:47:21<13:34:30, 7.29s/it] 45%|████▌ | 5488/12188 [11:47:28<13:33:59, 7.29s/it] {'loss': 0.3222, 'grad_norm': 0.6416852186222428, 'learning_rate': 6.04198437799443e-06, 'epoch': 0.45} + 45%|████▌ | 5488/12188 [11:47:28<13:33:59, 7.29s/it] 45%|████▌ | 5489/12188 [11:47:35<13:46:36, 7.40s/it] {'loss': 0.3155, 'grad_norm': 0.6485373823844366, 'learning_rate': 6.040684807750207e-06, 'epoch': 0.45} + 45%|████▌ | 5489/12188 [11:47:35<13:46:36, 7.40s/it] 45%|████▌ | 5490/12188 [11:47:42<13:27:38, 7.23s/it] {'loss': 0.3424, 'grad_norm': 0.6870121342363632, 'learning_rate': 6.039385164014494e-06, 'epoch': 0.45} + 45%|████▌ | 5490/12188 [11:47:42<13:27:38, 7.23s/it] 45%|████▌ | 5491/12188 [11:47:49<13:19:33, 7.16s/it] {'loss': 0.2995, 'grad_norm': 0.6473386424868797, 'learning_rate': 6.038085446879069e-06, 'epoch': 0.45} + 45%|████▌ | 5491/12188 [11:47:49<13:19:33, 7.16s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f3f87fc52b0> +[Try #0] Failed to fetch sample 4655847 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f3f87fc52b0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'AutomationID: footer-poweredbyico'"}, {'from': 'gpt', 'value': '\nclick(x=0.9215, y=0.679)\n'}]} + 45%|████▌ | 5492/12188 [11:47:56<13:06:11, 7.04s/it] {'loss': 0.3298, 'grad_norm': 0.6426299492137905, 'learning_rate': 6.0367856564357205e-06, 'epoch': 0.45} + 45%|████▌ | 5492/12188 [11:47:56<13:06:11, 7.04s/it] 45%|████▌ | 5493/12188 [11:48:04<13:51:22, 7.45s/it] {'loss': 0.3878, 'grad_norm': 0.664097347495453, 'learning_rate': 6.035485792776236e-06, 'epoch': 0.45} + 45%|████▌ | 5493/12188 [11:48:04<13:51:22, 7.45s/it] 45%|████▌ | 5494/12188 [11:48:12<14:03:38, 7.56s/it] {'loss': 0.3499, 'grad_norm': 0.6483958455480544, 'learning_rate': 6.034185855992406e-06, 'epoch': 0.45} + 45%|████▌ | 5494/12188 [11:48:12<14:03:38, 7.56s/it] 45%|████▌ | 5495/12188 [11:48:20<14:00:55, 7.54s/it] {'loss': 0.3048, 'grad_norm': 0.6619851940036816, 'learning_rate': 6.032885846176031e-06, 'epoch': 0.45} + 45%|████▌ | 5495/12188 [11:48:20<14:00:55, 7.54s/it] 45%|████▌ | 5496/12188 [11:48:27<13:43:50, 7.39s/it] {'loss': 0.3173, 'grad_norm': 0.6051700979335092, 'learning_rate': 6.031585763418919e-06, 'epoch': 0.45} + 45%|████▌ | 5496/12188 [11:48:27<13:43:50, 7.39s/it] 45%|████▌ | 5497/12188 [11:48:33<13:18:27, 7.16s/it] {'loss': 0.3204, 'grad_norm': 0.6241741447112886, 'learning_rate': 6.030285607812878e-06, 'epoch': 0.45} + 45%|████▌ | 5497/12188 [11:48:33<13:18:27, 7.16s/it] 45%|████▌ | 5498/12188 [11:48:42<14:01:59, 7.55s/it] {'loss': 0.3851, 'grad_norm': 0.6544476841076127, 'learning_rate': 6.028985379449721e-06, 'epoch': 0.45} + 45%|████▌ | 5498/12188 [11:48:42<14:01:59, 7.55s/it] 45%|████▌ | 5499/12188 [11:48:49<13:34:20, 7.30s/it] {'loss': 0.3285, 'grad_norm': 0.6947382906989665, 'learning_rate': 6.027685078421271e-06, 'epoch': 0.45} + 45%|████▌ | 5499/12188 [11:48:49<13:34:20, 7.30s/it] 45%|████▌ | 5500/12188 [11:48:56<13:39:34, 7.35s/it] {'loss': 0.3633, 'grad_norm': 0.7023779343859339, 'learning_rate': 6.026384704819353e-06, 'epoch': 0.45} + 45%|████▌ | 5500/12188 [11:48:56<13:39:34, 7.35s/it] 45%|████▌ | 5501/12188 [11:49:06<14:51:43, 8.00s/it] {'loss': 0.3175, 'grad_norm': 0.6799815687256484, 'learning_rate': 6.025084258735794e-06, 'epoch': 0.45} + 45%|████▌ | 5501/12188 [11:49:06<14:51:43, 8.00s/it] 45%|████▌ | 5502/12188 [11:49:12<14:13:57, 7.66s/it] {'loss': 0.3391, 'grad_norm': 0.6775441842805292, 'learning_rate': 6.0237837402624325e-06, 'epoch': 0.45} + 45%|████▌ | 5502/12188 [11:49:13<14:13:57, 7.66s/it] 45%|████▌ | 5503/12188 [11:49:20<14:19:19, 7.71s/it] {'loss': 0.3049, 'grad_norm': 0.6366694024553469, 'learning_rate': 6.022483149491108e-06, 'epoch': 0.45} + 45%|████▌ | 5503/12188 [11:49:20<14:19:19, 7.71s/it] 45%|████▌ | 5504/12188 [11:49:27<13:59:27, 7.54s/it] {'loss': 0.3402, 'grad_norm': 0.7432419452890565, 'learning_rate': 6.021182486513666e-06, 'epoch': 0.45} + 45%|████▌ | 5504/12188 [11:49:27<13:59:27, 7.54s/it] 45%|████▌ | 5505/12188 [11:49:34<13:33:43, 7.31s/it] {'loss': 0.3601, 'grad_norm': 0.6213128043872371, 'learning_rate': 6.019881751421956e-06, 'epoch': 0.45} + 45%|████▌ | 5505/12188 [11:49:34<13:33:43, 7.31s/it] 45%|████▌ | 5506/12188 [11:49:41<13:20:12, 7.19s/it] {'loss': 0.316, 'grad_norm': 0.6488952127113141, 'learning_rate': 6.018580944307838e-06, 'epoch': 0.45} + 45%|████▌ | 5506/12188 [11:49:41<13:20:12, 7.19s/it] 45%|████▌ | 5507/12188 [11:49:49<13:29:26, 7.27s/it] {'loss': 0.3317, 'grad_norm': 0.7075582753752307, 'learning_rate': 6.0172800652631706e-06, 'epoch': 0.45} + 45%|████▌ | 5507/12188 [11:49:49<13:29:26, 7.27s/it] 45%|████▌ | 5508/12188 [11:49:56<13:28:36, 7.26s/it] {'loss': 0.321, 'grad_norm': 0.5957253564481122, 'learning_rate': 6.015979114379817e-06, 'epoch': 0.45} + 45%|████▌ | 5508/12188 [11:49:56<13:28:36, 7.26s/it] 45%|████▌ | 5509/12188 [11:50:04<14:01:37, 7.56s/it] {'loss': 0.3421, 'grad_norm': 0.6973502544827489, 'learning_rate': 6.014678091749652e-06, 'epoch': 0.45} + 45%|████▌ | 5509/12188 [11:50:04<14:01:37, 7.56s/it] 45%|���███▌ | 5510/12188 [11:50:11<13:35:14, 7.32s/it] {'loss': 0.3239, 'grad_norm': 0.6462989343087144, 'learning_rate': 6.0133769974645495e-06, 'epoch': 0.45} + 45%|████▌ | 5510/12188 [11:50:11<13:35:14, 7.32s/it] 45%|████▌ | 5511/12188 [11:50:18<13:19:36, 7.19s/it] {'loss': 0.394, 'grad_norm': 0.7080401988445989, 'learning_rate': 6.0120758316163915e-06, 'epoch': 0.45} + 45%|████▌ | 5511/12188 [11:50:18<13:19:36, 7.19s/it] 45%|████▌ | 5512/12188 [11:50:25<13:38:57, 7.36s/it] {'loss': 0.3469, 'grad_norm': 0.6743565106731608, 'learning_rate': 6.0107745942970655e-06, 'epoch': 0.45} + 45%|████▌ | 5512/12188 [11:50:26<13:38:57, 7.36s/it] 45%|████▌ | 5513/12188 [11:50:32<13:18:14, 7.18s/it] {'loss': 0.3688, 'grad_norm': 0.658687660630437, 'learning_rate': 6.00947328559846e-06, 'epoch': 0.45} + 45%|████▌ | 5513/12188 [11:50:32<13:18:14, 7.18s/it] 45%|████▌ | 5514/12188 [11:50:39<12:49:48, 6.92s/it] {'loss': 0.3694, 'grad_norm': 0.6555819766162051, 'learning_rate': 6.008171905612475e-06, 'epoch': 0.45} + 45%|████▌ | 5514/12188 [11:50:39<12:49:48, 6.92s/it] 45%|████▌ | 5515/12188 [11:50:45<12:38:07, 6.82s/it] {'loss': 0.3225, 'grad_norm': 0.6513326651265298, 'learning_rate': 6.0068704544310074e-06, 'epoch': 0.45} + 45%|████▌ | 5515/12188 [11:50:45<12:38:07, 6.82s/it] 45%|████▌ | 5516/12188 [11:50:53<13:02:23, 7.04s/it] {'loss': 0.3221, 'grad_norm': 0.6541615310077519, 'learning_rate': 6.005568932145968e-06, 'epoch': 0.45} + 45%|████▌ | 5516/12188 [11:50:53<13:02:23, 7.04s/it] 45%|████▌ | 5517/12188 [11:50:59<12:49:45, 6.92s/it] {'loss': 0.334, 'grad_norm': 0.9898334595807405, 'learning_rate': 6.004267338849266e-06, 'epoch': 0.45} + 45%|████▌ | 5517/12188 [11:50:59<12:49:45, 6.92s/it] 45%|████▌ | 5518/12188 [11:51:07<13:18:48, 7.19s/it] {'loss': 0.2889, 'grad_norm': 0.6413067132671195, 'learning_rate': 6.0029656746328155e-06, 'epoch': 0.45} + 45%|████▌ | 5518/12188 [11:51:07<13:18:48, 7.19s/it] 45%|████▌ | 5519/12188 [11:51:15<13:49:01, 7.46s/it] {'loss': 0.2928, 'grad_norm': 0.6633380460388496, 'learning_rate': 6.0016639395885424e-06, 'epoch': 0.45} + 45%|████▌ | 5519/12188 [11:51:15<13:49:01, 7.46s/it] 45%|████▌ | 5520/12188 [11:51:23<13:55:40, 7.52s/it] {'loss': 0.3268, 'grad_norm': 0.6124229370074592, 'learning_rate': 6.000362133808371e-06, 'epoch': 0.45} + 45%|████▌ | 5520/12188 [11:51:23<13:55:40, 7.52s/it] 45%|████▌ | 5521/12188 [11:51:30<13:51:04, 7.48s/it] {'loss': 0.3388, 'grad_norm': 0.6309807644197025, 'learning_rate': 5.999060257384234e-06, 'epoch': 0.45} + 45%|████▌ | 5521/12188 [11:51:30<13:51:04, 7.48s/it] 45%|████▌ | 5522/12188 [11:51:38<13:42:29, 7.40s/it] {'loss': 0.3302, 'grad_norm': 0.6166869416788537, 'learning_rate': 5.997758310408065e-06, 'epoch': 0.45} + 45%|████▌ | 5522/12188 [11:51:38<13:42:29, 7.40s/it] 45%|████▌ | 5523/12188 [11:51:46<14:11:11, 7.66s/it] {'loss': 0.3519, 'grad_norm': 0.651285078681108, 'learning_rate': 5.996456292971807e-06, 'epoch': 0.45} + 45%|████▌ | 5523/12188 [11:51:46<14:11:11, 7.66s/it] 45%|████▌ | 5524/12188 [11:51:53<13:41:47, 7.40s/it] {'loss': 0.3842, 'grad_norm': 0.6562100223669893, 'learning_rate': 5.995154205167408e-06, 'epoch': 0.45} + 45%|████▌ | 5524/12188 [11:51:53<13:41:47, 7.40s/it] 45%|████▌ | 5525/12188 [11:52:00<13:38:04, 7.37s/it] {'loss': 0.3048, 'grad_norm': 0.6484352067715783, 'learning_rate': 5.993852047086816e-06, 'epoch': 0.45} + 45%|████▌ | 5525/12188 [11:52:00<13:38:04, 7.37s/it] 45%|████▌ | 5526/12188 [11:52:07<13:46:45, 7.45s/it] {'loss': 0.3338, 'grad_norm': 0.6188144797394131, 'learning_rate': 5.992549818821991e-06, 'epoch': 0.45} + 45%|████▌ | 5526/12188 [11:52:07<13:46:45, 7.45s/it] 45%|████▌ | 5527/12188 [11:52:16<14:27:31, 7.81s/it] {'loss': 0.3621, 'grad_norm': 0.6319155617119969, 'learning_rate': 5.991247520464892e-06, 'epoch': 0.45} + 45%|████▌ | 5527/12188 [11:52:16<14:27:31, 7.81s/it] 45%|████▌ | 5528/12188 [11:52:25<15:05:59, 8.16s/it] {'loss': 0.3174, 'grad_norm': 0.6383471081712955, 'learning_rate': 5.9899451521074854e-06, 'epoch': 0.45} + 45%|████▌ | 5528/12188 [11:52:25<15:05:59, 8.16s/it] 45%|████▌ | 5529/12188 [11:52:32<14:17:34, 7.73s/it] {'loss': 0.3361, 'grad_norm': 0.656940014252948, 'learning_rate': 5.988642713841743e-06, 'epoch': 0.45} + 45%|████▌ | 5529/12188 [11:52:32<14:17:34, 7.73s/it] 45%|████▌ | 5530/12188 [11:52:40<14:20:07, 7.75s/it] {'loss': 0.3479, 'grad_norm': 0.640893938208602, 'learning_rate': 5.987340205759641e-06, 'epoch': 0.45} + 45%|████▌ | 5530/12188 [11:52:40<14:20:07, 7.75s/it] 45%|████▌ | 5531/12188 [11:52:47<14:01:46, 7.59s/it] {'loss': 0.3455, 'grad_norm': 1.8795527842096598, 'learning_rate': 5.986037627953159e-06, 'epoch': 0.45} + 45%|████▌ | 5531/12188 [11:52:47<14:01:46, 7.59s/it] 45%|████▌ | 5532/12188 [11:52:54<13:54:46, 7.53s/it] {'loss': 0.3148, 'grad_norm': 0.6461924422753108, 'learning_rate': 5.984734980514283e-06, 'epoch': 0.45} + 45%|████▌ | 5532/12188 [11:52:54<13:54:46, 7.53s/it] 45%|████▌ | 5533/12188 [11:53:01<13:38:42, 7.38s/it] {'loss': 0.3656, 'grad_norm': 0.6443675541267752, 'learning_rate': 5.983432263535006e-06, 'epoch': 0.45} + 45%|████▌ | 5533/12188 [11:53:01<13:38:42, 7.38s/it] 45%|████▌ | 5534/12188 [11:53:09<13:33:36, 7.34s/it] {'loss': 0.343, 'grad_norm': 0.637461552770478, 'learning_rate': 5.9821294771073224e-06, 'epoch': 0.45} + 45%|████▌ | 5534/12188 [11:53:09<13:33:36, 7.34s/it] 45%|████▌ | 5535/12188 [11:53:16<13:23:19, 7.24s/it] {'loss': 0.3589, 'grad_norm': 0.6723983285669854, 'learning_rate': 5.980826621323235e-06, 'epoch': 0.45} + 45%|████▌ | 5535/12188 [11:53:16<13:23:19, 7.24s/it] 45%|████▌ | 5536/12188 [11:53:22<13:12:54, 7.15s/it] {'loss': 0.3093, 'grad_norm': 0.6321509849578475, 'learning_rate': 5.979523696274744e-06, 'epoch': 0.45} + 45%|████▌ | 5536/12188 [11:53:22<13:12:54, 7.15s/it] 45%|████▌ | 5537/12188 [11:53:29<12:53:54, 6.98s/it] {'loss': 0.3054, 'grad_norm': 0.6408188034387523, 'learning_rate': 5.978220702053865e-06, 'epoch': 0.45} + 45%|████▌ | 5537/12188 [11:53:29<12:53:54, 6.98s/it] 45%|████▌ | 5538/12188 [11:53:36<12:50:13, 6.95s/it] {'loss': 0.3456, 'grad_norm': 0.9813101505324792, 'learning_rate': 5.976917638752611e-06, 'epoch': 0.45} + 45%|████▌ | 5538/12188 [11:53:36<12:50:13, 6.95s/it] 45%|████▌ | 5539/12188 [11:53:43<13:06:07, 7.09s/it] {'loss': 0.3391, 'grad_norm': 0.6623833307308827, 'learning_rate': 5.975614506463003e-06, 'epoch': 0.45} + 45%|████▌ | 5539/12188 [11:53:43<13:06:07, 7.09s/it] 45%|████▌ | 5540/12188 [11:53:52<14:02:40, 7.61s/it] {'loss': 0.3189, 'grad_norm': 0.6857947812692796, 'learning_rate': 5.9743113052770665e-06, 'epoch': 0.45} + 45%|████▌ | 5540/12188 [11:53:52<14:02:40, 7.61s/it] 45%|████▌ | 5541/12188 [11:54:00<14:08:15, 7.66s/it] {'loss': 0.2922, 'grad_norm': 0.6528317641019266, 'learning_rate': 5.9730080352868315e-06, 'epoch': 0.45} + 45%|████▌ | 5541/12188 [11:54:00<14:08:15, 7.66s/it] 45%|████▌ | 5542/12188 [11:54:07<13:41:59, 7.42s/it] {'loss': 0.3745, 'grad_norm': 0.6609991345309001, 'learning_rate': 5.971704696584332e-06, 'epoch': 0.45} + 45%|████▌ | 5542/12188 [11:54:07<13:41:59, 7.42s/it] 45%|████▌ | 5543/12188 [11:54:14<13:27:58, 7.30s/it] {'loss': 0.3435, 'grad_norm': 0.7061369639985658, 'learning_rate': 5.970401289261607e-06, 'epoch': 0.45} + 45%|████▌ | 5543/12188 [11:54:14<13:27:58, 7.30s/it] 45%|████▌ | 5544/12188 [11:54:22<13:55:30, 7.55s/it] {'loss': 0.3824, 'grad_norm': 0.6373312215475151, 'learning_rate': 5.969097813410702e-06, 'epoch': 0.45} + 45%|████▌ | 5544/12188 [11:54:22<13:55:30, 7.55s/it] 45%|████▌ | 5545/12188 [11:54:29<13:42:10, 7.43s/it] {'loss': 0.318, 'grad_norm': 0.642361281823492, 'learning_rate': 5.967794269123667e-06, 'epoch': 0.45} + 45%|████▌ | 5545/12188 [11:54:29<13:42:10, 7.43s/it] 46%|████▌ | 5546/12188 [11:54:36<13:28:32, 7.30s/it] {'loss': 0.3526, 'grad_norm': 0.7159739929018204, 'learning_rate': 5.966490656492555e-06, 'epoch': 0.46} + 46%|████▌ | 5546/12188 [11:54:36<13:28:32, 7.30s/it] 46%|████▌ | 5547/12188 [11:54:43<13:15:04, 7.18s/it] {'loss': 0.3753, 'grad_norm': 0.7661485544414552, 'learning_rate': 5.9651869756094274e-06, 'epoch': 0.46} + 46%|████▌ | 5547/12188 [11:54:43<13:15:04, 7.18s/it] 46%|████▌ | 5548/12188 [11:54:50<13:07:37, 7.12s/it] {'loss': 0.3169, 'grad_norm': 0.6932638530063452, 'learning_rate': 5.9638832265663446e-06, 'epoch': 0.46} + 46%|████▌ | 5548/12188 [11:54:50<13:07:37, 7.12s/it] 46%|████▌ | 5549/12188 [11:54:58<13:42:26, 7.43s/it] {'loss': 0.3205, 'grad_norm': 0.6207358728507886, 'learning_rate': 5.962579409455378e-06, 'epoch': 0.46} + 46%|████▌ | 5549/12188 [11:54:58<13:42:26, 7.43s/it] 46%|████▌ | 5550/12188 [11:55:05<13:29:10, 7.31s/it] {'loss': 0.3437, 'grad_norm': 0.6888717439657758, 'learning_rate': 5.9612755243686e-06, 'epoch': 0.46} + 46%|████▌ | 5550/12188 [11:55:05<13:29:10, 7.31s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 46%|████▌ | 5551/12188 [11:55:12<12:58:34, 7.04s/it] {'loss': 0.6786, 'grad_norm': 0.6173028113380666, 'learning_rate': 5.959971571398088e-06, 'epoch': 0.46} + 46%|████▌ | 5551/12188 [11:55:12<12:58:34, 7.04s/it] 46%|████▌ | 5552/12188 [11:55:18<12:52:27, 6.98s/it] {'loss': 0.348, 'grad_norm': 0.7108502933887647, 'learning_rate': 5.958667550635926e-06, 'epoch': 0.46} + 46%|████▌ | 5552/12188 [11:55:18<12:52:27, 6.98s/it] 46%|████▌ | 5553/12188 [11:55:25<12:39:44, 6.87s/it] {'loss': 0.3234, 'grad_norm': 0.7920827249362714, 'learning_rate': 5.957363462174201e-06, 'epoch': 0.46} + 46%|████▌ | 5553/12188 [11:55:25<12:39:44, 6.87s/it] 46%|████▌ | 5554/12188 [11:55:32<12:33:24, 6.81s/it] {'loss': 0.3292, 'grad_norm': 0.769886138428182, 'learning_rate': 5.9560593061050075e-06, 'epoch': 0.46} + 46%|████▌ | 5554/12188 [11:55:32<12:33:24, 6.81s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fca60642890> +[Try #0] Failed to fetch sample 4605779 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fca60642890> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Disclaimers'"}, {'from': 'gpt', 'value': '\nclick(x=0.9515, y=0.533)\n'}]} + 46%|████▌ | 5555/12188 [11:55:39<12:32:25, 6.81s/it] {'loss': 0.348, 'grad_norm': 0.714548230853448, 'learning_rate': 5.954755082520443e-06, 'epoch': 0.46} + 46%|████▌ | 5555/12188 [11:55:39<12:32:25, 6.81s/it] 46%|████▌ | 5556/12188 [11:55:46<12:53:00, 6.99s/it] {'loss': 0.3608, 'grad_norm': 0.7138511905171131, 'learning_rate': 5.953450791512607e-06, 'epoch': 0.46} + 46%|████▌ | 5556/12188 [11:55:46<12:53:00, 6.99s/it] 46%|████▌ | 5557/12188 [11:55:53<12:55:13, 7.01s/it] {'loss': 0.3158, 'grad_norm': 1.3480332531131842, 'learning_rate': 5.952146433173608e-06, 'epoch': 0.46} + 46%|████▌ | 5557/12188 [11:55:53<12:55:13, 7.01s/it] 46%|████▌ | 5558/12188 [11:56:00<13:01:14, 7.07s/it] {'loss': 0.377, 'grad_norm': 0.6542702664985423, 'learning_rate': 5.950842007595558e-06, 'epoch': 0.46} + 46%|████▌ | 5558/12188 [11:56:00<13:01:14, 7.07s/it] 46%|████▌ | 5559/12188 [11:56:08<13:24:34, 7.28s/it] {'loss': 0.3151, 'grad_norm': 0.6411625265599863, 'learning_rate': 5.949537514870573e-06, 'epoch': 0.46} + 46%|████▌ | 5559/12188 [11:56:08<13:24:34, 7.28s/it] 46%|████▌ | 5560/12188 [11:56:17<14:05:31, 7.65s/it] {'loss': 0.3038, 'grad_norm': 0.6577815833295287, 'learning_rate': 5.948232955090773e-06, 'epoch': 0.46} + 46%|████▌ | 5560/12188 [11:56:17<14:05:31, 7.65s/it] 46%|████▌ | 5561/12188 [11:56:25<14:32:46, 7.90s/it] {'loss': 0.2997, 'grad_norm': 0.7073627133841413, 'learning_rate': 5.946928328348287e-06, 'epoch': 0.46} + 46%|████▌ | 5561/12188 [11:56:25<14:32:46, 7.90s/it] 46%|████▌ | 5562/12188 [11:56:32<13:54:31, 7.56s/it] {'loss': 0.3348, 'grad_norm': 0.658333878368744, 'learning_rate': 5.945623634735243e-06, 'epoch': 0.46} + 46%|████▌ | 5562/12188 [11:56:32<13:54:31, 7.56s/it] 46%|████▌ | 5563/12188 [11:56:38<13:23:22, 7.28s/it] {'loss': 0.322, 'grad_norm': 0.6239675523211403, 'learning_rate': 5.944318874343777e-06, 'epoch': 0.46} + 46%|████▌ | 5563/12188 [11:56:38<13:23:22, 7.28s/it] 46%|████▌ | 5564/12188 [11:56:46<13:27:17, 7.31s/it] {'loss': 0.3432, 'grad_norm': 0.6304928883848269, 'learning_rate': 5.9430140472660305e-06, 'epoch': 0.46} + 46%|████▌ | 5564/12188 [11:56:46<13:27:17, 7.31s/it] 46%|████▌ | 5565/12188 [11:56:53<13:21:52, 7.26s/it] {'loss': 0.3441, 'grad_norm': 0.9102701876462636, 'learning_rate': 5.941709153594146e-06, 'epoch': 0.46} + 46%|████▌ | 5565/12188 [11:56:53<13:21:52, 7.26s/it] 46%|████▌ | 5566/12188 [11:57:00<13:31:16, 7.35s/it] {'loss': 0.3211, 'grad_norm': 0.6753951424661508, 'learning_rate': 5.940404193420274e-06, 'epoch': 0.46} + 46%|████▌ | 5566/12188 [11:57:00<13:31:16, 7.35s/it] 46%|████▌ | 5567/12188 [11:57:08<13:50:57, 7.53s/it] {'loss': 0.352, 'grad_norm': 0.6465607232752106, 'learning_rate': 5.939099166836568e-06, 'epoch': 0.46} + 46%|████▌ | 5567/12188 [11:57:08<13:50:57, 7.53s/it] 46%|████▌ | 5568/12188 [11:57:16<13:44:54, 7.48s/it] {'loss': 0.348, 'grad_norm': 0.7658171727854373, 'learning_rate': 5.937794073935188e-06, 'epoch': 0.46} + 46%|████▌ | 5568/12188 [11:57:16<13:44:54, 7.48s/it] 46%|████▌ | 5569/12188 [11:57:23<13:36:35, 7.40s/it] {'loss': 0.3189, 'grad_norm': 0.7196852633028321, 'learning_rate': 5.936488914808299e-06, 'epoch': 0.46} + 46%|████▌ | 5569/12188 [11:57:23<13:36:35, 7.40s/it] 46%|████▌ | 5570/12188 [11:57:31<13:53:49, 7.56s/it] {'loss': 0.3257, 'grad_norm': 0.6637642728572852, 'learning_rate': 5.935183689548067e-06, 'epoch': 0.46} + 46%|████▌ | 5570/12188 [11:57:31<13:53:49, 7.56s/it] 46%|████▌ | 5571/12188 [11:57:38<13:31:52, 7.36s/it] {'loss': 0.3266, 'grad_norm': 0.7421734716874182, 'learning_rate': 5.933878398246664e-06, 'epoch': 0.46} + 46%|████▌ | 5571/12188 [11:57:38<13:31:52, 7.36s/it] 46%|████▌ | 5572/12188 [11:57:45<13:42:25, 7.46s/it] {'loss': 0.3643, 'grad_norm': 0.7003584444984671, 'learning_rate': 5.932573040996269e-06, 'epoch': 0.46} + 46%|████▌ | 5572/12188 [11:57:46<13:42:25, 7.46s/it] 46%|████▌ | 5573/12188 [11:57:52<13:11:37, 7.18s/it] {'loss': 0.3074, 'grad_norm': 0.6692826823807438, 'learning_rate': 5.931267617889064e-06, 'epoch': 0.46} + 46%|████▌ | 5573/12188 [11:57:52<13:11:37, 7.18s/it] 46%|████▌ | 5574/12188 [11:57:59<12:55:09, 7.03s/it] {'loss': 0.3879, 'grad_norm': 0.6864573590187877, 'learning_rate': 5.929962129017237e-06, 'epoch': 0.46} + 46%|████▌ | 5574/12188 [11:57:59<12:55:09, 7.03s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 90, in pil_loader + return img.convert("RGB") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 993, in convert + self.load() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 319, in load + raise _get_oserror(err_code, encoder=False) +OSError: unrecognized data stream contents when reading image file +[Try #0] Failed to fetch sample 6015719 in VC:s3://gui-agent/data_20250623/windows_augment/images. Exception: unrecognized data stream contents when reading image file +Problematic sample: {'image': 'autocad/20250508_161646_1/images/before_screenshot_1_id_73_internvl_element-caption_crop_0_grounding_instructions_point_o_paste.png', 'conversations': [{'from': 'human', 'value': "\nOnly respond with the point inside: The 'Block Editor' button in the AutoCAD ribbon interface that allows users to edit block definitions in the drawing."}, {'from': 'gpt', 'value': "The 'Block Editor' button in the AutoCAD ribbon interface that allows users to edit block definitions in the drawing.[[584, 97]]"}], 'width': 3600, 'height': 2338} + 46%|████▌ | 5575/12188 [11:58:07<13:31:54, 7.37s/it] {'loss': 0.308, 'grad_norm': 0.6641824542863001, 'learning_rate': 5.928656574472977e-06, 'epoch': 0.46} + 46%|████▌ | 5575/12188 [11:58:07<13:31:54, 7.37s/it] 46%|████▌ | 5576/12188 [11:58:14<13:09:11, 7.16s/it] {'loss': 0.3722, 'grad_norm': 0.8164192912212634, 'learning_rate': 5.9273509543484846e-06, 'epoch': 0.46} + 46%|████▌ | 5576/12188 [11:58:14<13:09:11, 7.16s/it] 46%|████▌ | 5577/12188 [11:58:20<12:59:25, 7.07s/it] {'loss': 0.3268, 'grad_norm': 0.6556268800038538, 'learning_rate': 5.926045268735955e-06, 'epoch': 0.46} + 46%|████▌ | 5577/12188 [11:58:20<12:59:25, 7.07s/it] 46%|████▌ | 5578/12188 [11:58:27<12:46:39, 6.96s/it] {'loss': 0.3621, 'grad_norm': 0.7011062163856382, 'learning_rate': 5.924739517727598e-06, 'epoch': 0.46} + 46%|████▌ | 5578/12188 [11:58:27<12:46:39, 6.96s/it] 46%|████▌ | 5579/12188 [11:58:34<12:39:48, 6.90s/it] {'loss': 0.3263, 'grad_norm': 0.7564384318443695, 'learning_rate': 5.9234337014156205e-06, 'epoch': 0.46} + 46%|████▌ | 5579/12188 [11:58:34<12:39:48, 6.90s/it] 46%|████▌ | 5580/12188 [11:58:41<12:32:33, 6.83s/it] {'loss': 0.3329, 'grad_norm': 0.6506223738886315, 'learning_rate': 5.92212781989224e-06, 'epoch': 0.46} + 46%|████▌ | 5580/12188 [11:58:41<12:32:33, 6.83s/it] 46%|████▌ | 5581/12188 [11:58:48<12:46:37, 6.96s/it] {'loss': 0.3117, 'grad_norm': 0.6349843433646007, 'learning_rate': 5.9208218732496725e-06, 'epoch': 0.46} + 46%|████▌ | 5581/12188 [11:58:48<12:46:37, 6.96s/it] 46%|████▌ | 5582/12188 [11:58:54<12:37:27, 6.88s/it] {'loss': 0.326, 'grad_norm': 0.6894768939509959, 'learning_rate': 5.919515861580145e-06, 'epoch': 0.46} + 46%|████▌ | 5582/12188 [11:58:55<12:37:27, 6.88s/it] 46%|████▌ | 5583/12188 [11:59:01<12:38:57, 6.89s/it] {'loss': 0.3664, 'grad_norm': 0.6721422338320288, 'learning_rate': 5.9182097849758856e-06, 'epoch': 0.46} + 46%|████▌ | 5583/12188 [11:59:01<12:38:57, 6.89s/it] 46%|████▌ | 5584/12188 [11:59:09<12:47:55, 6.98s/it] {'loss': 0.3321, 'grad_norm': 0.7495446309772632, 'learning_rate': 5.916903643529125e-06, 'epoch': 0.46} + 46%|████▌ | 5584/12188 [11:59:09<12:47:55, 6.98s/it] 46%|████▌ | 5585/12188 [11:59:15<12:44:41, 6.95s/it] {'loss': 0.3153, 'grad_norm': 0.5928276583448506, 'learning_rate': 5.915597437332101e-06, 'epoch': 0.46} + 46%|████▌ | 5585/12188 [11:59:15<12:44:41, 6.95s/it] 46%|████▌ | 5586/12188 [11:59:23<13:01:19, 7.10s/it] {'loss': 0.3579, 'grad_norm': 1.0960855704915726, 'learning_rate': 5.91429116647706e-06, 'epoch': 0.46} + 46%|████▌ | 5586/12188 [11:59:23<13:01:19, 7.10s/it] 46%|████▌ | 5587/12188 [11:59:31<13:23:27, 7.30s/it] {'loss': 0.3499, 'grad_norm': 0.6672754623195776, 'learning_rate': 5.9129848310562435e-06, 'epoch': 0.46} + 46%|████▌ | 5587/12188 [11:59:31<13:23:27, 7.30s/it] 46%|████▌ | 5588/12188 [11:59:39<13:42:54, 7.48s/it] {'loss': 0.3432, 'grad_norm': 0.8529967749865908, 'learning_rate': 5.911678431161907e-06, 'epoch': 0.46} + 46%|████▌ | 5588/12188 [11:59:39<13:42:54, 7.48s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 46%|████▌ | 5589/12188 [11:59:45<13:02:43, 7.12s/it] {'loss': 0.6318, 'grad_norm': 0.6246207412681954, 'learning_rate': 5.910371966886303e-06, 'epoch': 0.46} + 46%|████▌ | 5589/12188 [11:59:45<13:02:43, 7.12s/it] 46%|████▌ | 5590/12188 [11:59:53<13:42:09, 7.48s/it] {'loss': 0.3445, 'grad_norm': 0.746595676133107, 'learning_rate': 5.909065438321693e-06, 'epoch': 0.46} + 46%|████▌ | 5590/12188 [11:59:53<13:42:09, 7.48s/it] 46%|████▌ | 5591/12188 [12:00:00<13:11:56, 7.20s/it] {'loss': 0.3478, 'grad_norm': 0.6708414689120072, 'learning_rate': 5.907758845560343e-06, 'epoch': 0.46} + 46%|████▌ | 5591/12188 [12:00:00<13:11:56, 7.20s/it] 46%|████▌ | 5592/12188 [12:00:06<12:53:43, 7.04s/it] {'loss': 0.3318, 'grad_norm': 0.7225947969439656, 'learning_rate': 5.906452188694523e-06, 'epoch': 0.46} + 46%|████▌ | 5592/12188 [12:00:06<12:53:43, 7.04s/it] 46%|████▌ | 5593/12188 [12:00:14<13:08:33, 7.17s/it] {'loss': 0.3359, 'grad_norm': 0.6931856774318916, 'learning_rate': 5.905145467816504e-06, 'epoch': 0.46} + 46%|████▌ | 5593/12188 [12:00:14<13:08:33, 7.17s/it] 46%|████▌ | 5594/12188 [12:00:21<12:59:15, 7.09s/it] {'loss': 0.3351, 'grad_norm': 0.6366168677805925, 'learning_rate': 5.903838683018566e-06, 'epoch': 0.46} + 46%|████▌ | 5594/12188 [12:00:21<12:59:15, 7.09s/it] 46%|████▌ | 5595/12188 [12:00:27<12:43:44, 6.95s/it] {'loss': 0.302, 'grad_norm': 0.7174774533206663, 'learning_rate': 5.902531834392993e-06, 'epoch': 0.46} + 46%|████▌ | 5595/12188 [12:00:27<12:43:44, 6.95s/it] 46%|████▌ | 5596/12188 [12:00:34<12:36:58, 6.89s/it] {'loss': 0.3023, 'grad_norm': 0.7812284102424977, 'learning_rate': 5.901224922032072e-06, 'epoch': 0.46} + 46%|████▌ | 5596/12188 [12:00:34<12:36:58, 6.89s/it] 46%|████▌ | 5597/12188 [12:00:41<12:44:07, 6.96s/it] {'loss': 0.3376, 'grad_norm': 0.7642154106020173, 'learning_rate': 5.899917946028095e-06, 'epoch': 0.46} + 46%|████▌ | 5597/12188 [12:00:41<12:44:07, 6.96s/it] 46%|████▌ | 5598/12188 [12:00:48<12:48:48, 7.00s/it] {'loss': 0.3361, 'grad_norm': 0.6766804570651881, 'learning_rate': 5.8986109064733586e-06, 'epoch': 0.46} + 46%|████▌ | 5598/12188 [12:00:48<12:48:48, 7.00s/it] 46%|████▌ | 5599/12188 [12:00:56<13:07:08, 7.17s/it] {'loss': 0.3427, 'grad_norm': 0.6543002974320388, 'learning_rate': 5.897303803460162e-06, 'epoch': 0.46} + 46%|████▌ | 5599/12188 [12:00:56<13:07:08, 7.17s/it] 46%|████▌ | 5600/12188 [12:01:04<13:20:32, 7.29s/it] {'loss': 0.317, 'grad_norm': 0.6157343494189152, 'learning_rate': 5.895996637080814e-06, 'epoch': 0.46} + 46%|████▌ | 5600/12188 [12:01:04<13:20:32, 7.29s/it] 46%|████▌ | 5601/12188 [12:01:11<13:18:08, 7.27s/it] {'loss': 0.3243, 'grad_norm': 0.6364521858715172, 'learning_rate': 5.894689407427623e-06, 'epoch': 0.46} + 46%|████▌ | 5601/12188 [12:01:11<13:18:08, 7.27s/it] 46%|████▌ | 5602/12188 [12:01:18<13:09:52, 7.20s/it] {'loss': 0.3467, 'grad_norm': 0.6713622292610186, 'learning_rate': 5.893382114592904e-06, 'epoch': 0.46} + 46%|████▌ | 5602/12188 [12:01:18<13:09:52, 7.20s/it] 46%|████▌ | 5603/12188 [12:01:25<13:14:15, 7.24s/it] {'loss': 0.3088, 'grad_norm': 0.7004919687277371, 'learning_rate': 5.892074758668974e-06, 'epoch': 0.46} + 46%|████▌ | 5603/12188 [12:01:25<13:14:15, 7.24s/it] 46%|████▌ | 5604/12188 [12:01:32<12:53:40, 7.05s/it] {'loss': 0.3451, 'grad_norm': 0.7669819596501203, 'learning_rate': 5.890767339748159e-06, 'epoch': 0.46} + 46%|████▌ | 5604/12188 [12:01:32<12:53:40, 7.05s/it] 46%|████▌ | 5605/12188 [12:01:38<12:39:13, 6.92s/it] {'loss': 0.3406, 'grad_norm': 0.6059230749650479, 'learning_rate': 5.889459857922786e-06, 'epoch': 0.46} + 46%|████▌ | 5605/12188 [12:01:38<12:39:13, 6.92s/it] 46%|████▌ | 5606/12188 [12:01:46<12:53:01, 7.05s/it] {'loss': 0.3461, 'grad_norm': 0.709874464505298, 'learning_rate': 5.888152313285185e-06, 'epoch': 0.46} + 46%|████▌ | 5606/12188 [12:01:46<12:53:01, 7.05s/it] 46%|████▌ | 5607/12188 [12:01:53<13:04:01, 7.15s/it] {'loss': 0.3904, 'grad_norm': 0.67836227331605, 'learning_rate': 5.886844705927696e-06, 'epoch': 0.46} + 46%|████▌ | 5607/12188 [12:01:53<13:04:01, 7.15s/it] 46%|████▌ | 5608/12188 [12:02:00<12:55:07, 7.07s/it] {'loss': 0.3432, 'grad_norm': 0.7102065968634225, 'learning_rate': 5.885537035942657e-06, 'epoch': 0.46} + 46%|████▌ | 5608/12188 [12:02:00<12:55:07, 7.07s/it] 46%|████▌ | 5609/12188 [12:02:07<13:05:21, 7.16s/it] {'loss': 0.3316, 'grad_norm': 0.7212216636617652, 'learning_rate': 5.884229303422417e-06, 'epoch': 0.46} + 46%|████▌ | 5609/12188 [12:02:07<13:05:21, 7.16s/it] 46%|████▌ | 5610/12188 [12:02:15<13:19:03, 7.29s/it] {'loss': 0.3351, 'grad_norm': 0.7813451457418568, 'learning_rate': 5.882921508459323e-06, 'epoch': 0.46} + 46%|████▌ | 5610/12188 [12:02:15<13:19:03, 7.29s/it] 46%|████▌ | 5611/12188 [12:02:22<13:14:39, 7.25s/it] {'loss': 0.3468, 'grad_norm': 0.6929304489473179, 'learning_rate': 5.881613651145732e-06, 'epoch': 0.46} + 46%|████▌ | 5611/12188 [12:02:22<13:14:39, 7.25s/it] 46%|████▌ | 5612/12188 [12:02:30<13:39:10, 7.47s/it] {'loss': 0.3404, 'grad_norm': 0.8338467280648812, 'learning_rate': 5.880305731574001e-06, 'epoch': 0.46} + 46%|████▌ | 5612/12188 [12:02:30<13:39:10, 7.47s/it] 46%|████▌ | 5613/12188 [12:02:37<13:11:17, 7.22s/it] {'loss': 0.3348, 'grad_norm': 0.6442309183856274, 'learning_rate': 5.878997749836493e-06, 'epoch': 0.46} + 46%|████▌ | 5613/12188 [12:02:37<13:11:17, 7.22s/it] 46%|████▌ | 5614/12188 [12:02:43<12:57:20, 7.09s/it] {'loss': 0.3526, 'grad_norm': 0.80036370569721, 'learning_rate': 5.8776897060255755e-06, 'epoch': 0.46} + 46%|████▌ | 5614/12188 [12:02:43<12:57:20, 7.09s/it] 46%|████▌ | 5615/12188 [12:02:51<13:06:29, 7.18s/it] {'loss': 0.3713, 'grad_norm': 0.6978485357394113, 'learning_rate': 5.876381600233623e-06, 'epoch': 0.46} + 46%|████▌ | 5615/12188 [12:02:51<13:06:29, 7.18s/it] 46%|████▌ | 5616/12188 [12:02:57<12:43:55, 6.97s/it] {'loss': 0.3621, 'grad_norm': 0.6930847977548901, 'learning_rate': 5.8750734325530075e-06, 'epoch': 0.46} + 46%|████▌ | 5616/12188 [12:02:57<12:43:55, 6.97s/it] 46%|████▌ | 5617/12188 [12:03:05<12:50:27, 7.04s/it] {'loss': 0.3496, 'grad_norm': 0.8867558998264928, 'learning_rate': 5.873765203076115e-06, 'epoch': 0.46} + 46%|████▌ | 5617/12188 [12:03:05<12:50:27, 7.04s/it] 46%|████▌ | 5618/12188 [12:03:11<12:35:27, 6.90s/it] {'loss': 0.326, 'grad_norm': 0.6412492395153948, 'learning_rate': 5.872456911895326e-06, 'epoch': 0.46} + 46%|████▌ | 5618/12188 [12:03:11<12:35:27, 6.90s/it] 46%|████▌ | 5619/12188 [12:03:19<12:56:00, 7.09s/it] {'loss': 0.3536, 'grad_norm': 0.7939947686776085, 'learning_rate': 5.8711485591030315e-06, 'epoch': 0.46} + 46%|████▌ | 5619/12188 [12:03:19<12:56:00, 7.09s/it] 46%|████▌ | 5620/12188 [12:03:25<12:45:12, 6.99s/it] {'loss': 0.3515, 'grad_norm': 0.672878074509171, 'learning_rate': 5.869840144791625e-06, 'epoch': 0.46} + 46%|████▌ | 5620/12188 [12:03:25<12:45:12, 6.99s/it] 46%|████▌ | 5621/12188 [12:03:32<12:42:09, 6.96s/it] {'loss': 0.3127, 'grad_norm': 0.643158830987762, 'learning_rate': 5.868531669053506e-06, 'epoch': 0.46} + 46%|████▌ | 5621/12188 [12:03:32<12:42:09, 6.96s/it] 46%|████▌ | 5622/12188 [12:03:39<12:38:21, 6.93s/it] {'loss': 0.3229, 'grad_norm': 0.5946572766363513, 'learning_rate': 5.867223131981075e-06, 'epoch': 0.46} + 46%|████▌ | 5622/12188 [12:03:39<12:38:21, 6.93s/it] 46%|████▌ | 5623/12188 [12:03:46<12:37:30, 6.92s/it] {'loss': 0.3309, 'grad_norm': 0.7946946190496376, 'learning_rate': 5.86591453366674e-06, 'epoch': 0.46} + 46%|████▌ | 5623/12188 [12:03:46<12:37:30, 6.92s/it] 46%|████▌ | 5624/12188 [12:03:53<12:45:29, 7.00s/it] {'loss': 0.3575, 'grad_norm': 0.6093890352942969, 'learning_rate': 5.864605874202912e-06, 'epoch': 0.46} + 46%|████▌ | 5624/12188 [12:03:53<12:45:29, 7.00s/it] 46%|████▌ | 5625/12188 [12:04:01<12:58:11, 7.11s/it] {'loss': 0.3469, 'grad_norm': 0.6775713046048214, 'learning_rate': 5.863297153682006e-06, 'epoch': 0.46} + 46%|████▌ | 5625/12188 [12:04:01<12:58:11, 7.11s/it] 46%|████▌ | 5626/12188 [12:04:08<12:54:35, 7.08s/it] {'loss': 0.3565, 'grad_norm': 0.6487274084713462, 'learning_rate': 5.861988372196442e-06, 'epoch': 0.46} + 46%|████▌ | 5626/12188 [12:04:08<12:54:35, 7.08s/it] 46%|████▌ | 5627/12188 [12:04:14<12:40:40, 6.96s/it] {'loss': 0.3039, 'grad_norm': 0.6698768711130284, 'learning_rate': 5.860679529838645e-06, 'epoch': 0.46} + 46%|████▌ | 5627/12188 [12:04:14<12:40:40, 6.96s/it] 46%|████▌ | 5628/12188 [12:04:22<12:52:23, 7.06s/it] {'loss': 0.3211, 'grad_norm': 0.6427905418362132, 'learning_rate': 5.859370626701042e-06, 'epoch': 0.46} + 46%|████▌ | 5628/12188 [12:04:22<12:52:23, 7.06s/it] 46%|████▌ | 5629/12188 [12:04:30<13:38:47, 7.49s/it] {'loss': 0.3253, 'grad_norm': 0.6576717514325471, 'learning_rate': 5.858061662876066e-06, 'epoch': 0.46} + 46%|████▌ | 5629/12188 [12:04:30<13:38:47, 7.49s/it] 46%|████▌ | 5630/12188 [12:04:38<13:44:33, 7.54s/it] {'loss': 0.352, 'grad_norm': 0.701705075690251, 'learning_rate': 5.8567526384561526e-06, 'epoch': 0.46} + 46%|████▌ | 5630/12188 [12:04:38<13:44:33, 7.54s/it] 46%|████▌ | 5631/12188 [12:04:45<13:18:05, 7.30s/it] {'loss': 0.3322, 'grad_norm': 0.6837494800102436, 'learning_rate': 5.855443553533746e-06, 'epoch': 0.46} + 46%|████▌ | 5631/12188 [12:04:45<13:18:05, 7.30s/it] 46%|████▌ | 5632/12188 [12:04:51<12:58:15, 7.12s/it] {'loss': 0.354, 'grad_norm': 0.6678776428796558, 'learning_rate': 5.85413440820129e-06, 'epoch': 0.46} + 46%|████▌ | 5632/12188 [12:04:51<12:58:15, 7.12s/it] 46%|████▌ | 5633/12188 [12:04:58<12:58:49, 7.13s/it] {'loss': 0.3578, 'grad_norm': 0.6652860079576262, 'learning_rate': 5.852825202551234e-06, 'epoch': 0.46} + 46%|████▌ | 5633/12188 [12:04:58<12:58:49, 7.13s/it] 46%|████▌ | 5634/12188 [12:05:05<12:39:16, 6.95s/it] {'loss': 0.3361, 'grad_norm': 0.7046878807144938, 'learning_rate': 5.851515936676031e-06, 'epoch': 0.46} + 46%|████▌ | 5634/12188 [12:05:05<12:39:16, 6.95s/it] 46%|████▌ | 5635/12188 [12:05:12<12:49:52, 7.05s/it] {'loss': 0.3289, 'grad_norm': 0.6846089112196712, 'learning_rate': 5.8502066106681425e-06, 'epoch': 0.46} + 46%|████▌ | 5635/12188 [12:05:12<12:49:52, 7.05s/it] 46%|████▌ | 5636/12188 [12:05:20<13:19:12, 7.32s/it] {'loss': 0.3361, 'grad_norm': 0.638872189056295, 'learning_rate': 5.848897224620027e-06, 'epoch': 0.46} + 46%|████▌ | 5636/12188 [12:05:20<13:19:12, 7.32s/it] 46%|████▋ | 5637/12188 [12:05:27<13:18:35, 7.31s/it] {'loss': 0.334, 'grad_norm': 0.6041738858037254, 'learning_rate': 5.847587778624155e-06, 'epoch': 0.46} + 46%|████▋ | 5637/12188 [12:05:27<13:18:35, 7.31s/it] 46%|████▋ | 5638/12188 [12:05:34<13:03:07, 7.17s/it] {'loss': 0.3233, 'grad_norm': 0.6968250852540273, 'learning_rate': 5.846278272772996e-06, 'epoch': 0.46} + 46%|████▋ | 5638/12188 [12:05:34<13:03:07, 7.17s/it] 46%|████▋ | 5639/12188 [12:05:42<13:13:48, 7.27s/it] {'loss': 0.3109, 'grad_norm': 0.6672849828130197, 'learning_rate': 5.8449687071590245e-06, 'epoch': 0.46} + 46%|████▋ | 5639/12188 [12:05:42<13:13:48, 7.27s/it] 46%|████▋ | 5640/12188 [12:05:49<13:00:17, 7.15s/it] {'loss': 0.3153, 'grad_norm': 0.6853404389504869, 'learning_rate': 5.8436590818747205e-06, 'epoch': 0.46} + 46%|████▋ | 5640/12188 [12:05:49<13:00:17, 7.15s/it] 46%|████▋ | 5641/12188 [12:05:56<13:17:41, 7.31s/it] {'loss': 0.3177, 'grad_norm': 0.6261927617724908, 'learning_rate': 5.842349397012568e-06, 'epoch': 0.46} + 46%|████▋ | 5641/12188 [12:05:56<13:17:41, 7.31s/it] 46%|████▋ | 5642/12188 [12:06:04<13:26:17, 7.39s/it] {'loss': 0.335, 'grad_norm': 0.6432644086075544, 'learning_rate': 5.841039652665055e-06, 'epoch': 0.46} + 46%|████▋ | 5642/12188 [12:06:04<13:26:17, 7.39s/it] 46%|████▋ | 5643/12188 [12:06:11<13:09:54, 7.24s/it] {'loss': 0.3563, 'grad_norm': 0.7157849052037343, 'learning_rate': 5.83972984892467e-06, 'epoch': 0.46} + 46%|████▋ | 5643/12188 [12:06:11<13:09:54, 7.24s/it] 46%|████▋ | 5644/12188 [12:06:19<13:26:08, 7.39s/it] {'loss': 0.3382, 'grad_norm': 0.7580150230718008, 'learning_rate': 5.838419985883914e-06, 'epoch': 0.46} + 46%|████▋ | 5644/12188 [12:06:19<13:26:08, 7.39s/it] 46%|████▋ | 5645/12188 [12:06:27<13:49:38, 7.61s/it] {'loss': 0.3799, 'grad_norm': 0.6554450690139668, 'learning_rate': 5.837110063635288e-06, 'epoch': 0.46} + 46%|████▋ | 5645/12188 [12:06:27<13:49:38, 7.61s/it] 46%|████▋ | 5646/12188 [12:06:33<13:16:31, 7.31s/it] {'loss': 0.3459, 'grad_norm': 0.7418496893573319, 'learning_rate': 5.835800082271292e-06, 'epoch': 0.46} + 46%|████▋ | 5646/12188 [12:06:33<13:16:31, 7.31s/it] 46%|████▋ | 5647/12188 [12:06:40<12:58:58, 7.15s/it] {'loss': 0.3339, 'grad_norm': 0.6391218671549351, 'learning_rate': 5.8344900418844375e-06, 'epoch': 0.46} + 46%|████▋ | 5647/12188 [12:06:40<12:58:58, 7.15s/it] 46%|████▋ | 5648/12188 [12:06:47<12:51:06, 7.07s/it] {'loss': 0.3193, 'grad_norm': 0.636662898049728, 'learning_rate': 5.833179942567236e-06, 'epoch': 0.46} + 46%|████▋ | 5648/12188 [12:06:47<12:51:06, 7.07s/it] 46%|████▋ | 5649/12188 [12:06:54<13:02:12, 7.18s/it] {'loss': 0.3508, 'grad_norm': 0.6783243008827441, 'learning_rate': 5.831869784412205e-06, 'epoch': 0.46} + 46%|████▋ | 5649/12188 [12:06:54<13:02:12, 7.18s/it] 46%|████▋ | 5650/12188 [12:07:01<12:53:21, 7.10s/it] {'loss': 0.3467, 'grad_norm': 0.6515139392200288, 'learning_rate': 5.830559567511867e-06, 'epoch': 0.46} + 46%|████▋ | 5650/12188 [12:07:01<12:53:21, 7.10s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 46%|████▋ | 5651/12188 [12:07:08<12:28:25, 6.87s/it] {'loss': 0.6518, 'grad_norm': 0.6605002003121275, 'learning_rate': 5.8292492919587475e-06, 'epoch': 0.46} + 46%|████▋ | 5651/12188 [12:07:08<12:28:25, 6.87s/it] 46%|████▋ | 5652/12188 [12:07:15<12:44:57, 7.02s/it] {'loss': 0.3614, 'grad_norm': 0.6749060190482958, 'learning_rate': 5.827938957845376e-06, 'epoch': 0.46} + 46%|████▋ | 5652/12188 [12:07:15<12:44:57, 7.02s/it] 46%|████▋ | 5653/12188 [12:07:22<12:35:16, 6.93s/it] {'loss': 0.3442, 'grad_norm': 0.8329708839216042, 'learning_rate': 5.826628565264284e-06, 'epoch': 0.46} + 46%|████▋ | 5653/12188 [12:07:22<12:35:16, 6.93s/it] 46%|████▋ | 5654/12188 [12:07:29<12:48:49, 7.06s/it] {'loss': 0.3607, 'grad_norm': 0.6813145527459288, 'learning_rate': 5.825318114308012e-06, 'epoch': 0.46} + 46%|████▋ | 5654/12188 [12:07:29<12:48:49, 7.06s/it] 46%|████▋ | 5655/12188 [12:07:36<12:49:21, 7.07s/it] {'loss': 0.2884, 'grad_norm': 0.9540762572197363, 'learning_rate': 5.824007605069099e-06, 'epoch': 0.46} + 46%|████▋ | 5655/12188 [12:07:36<12:49:21, 7.07s/it] 46%|████▋ | 5656/12188 [12:07:43<12:54:06, 7.11s/it] {'loss': 0.3373, 'grad_norm': 0.7936298281020341, 'learning_rate': 5.822697037640094e-06, 'epoch': 0.46} + 46%|████▋ | 5656/12188 [12:07:43<12:54:06, 7.11s/it] 46%|████▋ | 5657/12188 [12:07:50<12:52:28, 7.10s/it] {'loss': 0.3574, 'grad_norm': 0.7327339545283205, 'learning_rate': 5.821386412113546e-06, 'epoch': 0.46} + 46%|████▋ | 5657/12188 [12:07:50<12:52:28, 7.10s/it] 46%|████▋ | 5658/12188 [12:07:57<12:49:09, 7.07s/it] {'loss': 0.3662, 'grad_norm': 1.2593623174008202, 'learning_rate': 5.820075728582008e-06, 'epoch': 0.46} + 46%|████▋ | 5658/12188 [12:07:57<12:49:09, 7.07s/it] 46%|████▋ | 5659/12188 [12:08:04<12:36:02, 6.95s/it] {'loss': 0.3832, 'grad_norm': 0.656581754887785, 'learning_rate': 5.818764987138041e-06, 'epoch': 0.46} + 46%|████▋ | 5659/12188 [12:08:04<12:36:02, 6.95s/it] 46%|████▋ | 5660/12188 [12:08:11<12:47:10, 7.05s/it] {'loss': 0.3032, 'grad_norm': 0.6446310404647804, 'learning_rate': 5.817454187874206e-06, 'epoch': 0.46} + 46%|████▋ | 5660/12188 [12:08:11<12:47:10, 7.05s/it] 46%|████▋ | 5661/12188 [12:08:18<12:41:30, 7.00s/it] {'loss': 0.3358, 'grad_norm': 1.0469953252484536, 'learning_rate': 5.816143330883069e-06, 'epoch': 0.46} + 46%|████▋ | 5661/12188 [12:08:18<12:41:30, 7.00s/it] 46%|████▋ | 5662/12188 [12:08:25<12:46:04, 7.04s/it] {'loss': 0.3331, 'grad_norm': 1.2437371318533903, 'learning_rate': 5.814832416257201e-06, 'epoch': 0.46} + 46%|████▋ | 5662/12188 [12:08:25<12:46:04, 7.04s/it] 46%|████▋ | 5663/12188 [12:08:32<12:43:11, 7.02s/it] {'loss': 0.3291, 'grad_norm': 0.632829735665257, 'learning_rate': 5.813521444089177e-06, 'epoch': 0.46} + 46%|████▋ | 5663/12188 [12:08:32<12:43:11, 7.02s/it] 46%|████▋ | 5664/12188 [12:08:40<12:55:33, 7.13s/it] {'loss': 0.3427, 'grad_norm': 0.7060543187605239, 'learning_rate': 5.812210414471576e-06, 'epoch': 0.46} + 46%|████▋ | 5664/12188 [12:08:40<12:55:33, 7.13s/it] 46%|████▋ | 5665/12188 [12:08:46<12:40:20, 6.99s/it] {'loss': 0.3479, 'grad_norm': 0.6473358016596947, 'learning_rate': 5.810899327496982e-06, 'epoch': 0.46} + 46%|████▋ | 5665/12188 [12:08:46<12:40:20, 6.99s/it] 46%|████▋ | 5666/12188 [12:08:53<12:38:29, 6.98s/it] {'loss': 0.3771, 'grad_norm': 0.6975671980332018, 'learning_rate': 5.8095881832579785e-06, 'epoch': 0.46} + 46%|████▋ | 5666/12188 [12:08:53<12:38:29, 6.98s/it] 46%|████▋ | 5667/12188 [12:09:01<12:53:39, 7.12s/it] {'loss': 0.3399, 'grad_norm': 0.7281542023061512, 'learning_rate': 5.808276981847158e-06, 'epoch': 0.46} + 46%|████▋ | 5667/12188 [12:09:01<12:53:39, 7.12s/it] 47%|████▋ | 5668/12188 [12:09:08<13:04:28, 7.22s/it] {'loss': 0.3496, 'grad_norm': 0.7497563457750749, 'learning_rate': 5.806965723357117e-06, 'epoch': 0.47} + 47%|████▋ | 5668/12188 [12:09:08<13:04:28, 7.22s/it] 47%|████▋ | 5669/12188 [12:09:16<13:15:26, 7.32s/it] {'loss': 0.3884, 'grad_norm': 0.7702950270217217, 'learning_rate': 5.805654407880451e-06, 'epoch': 0.47} + 47%|████▋ | 5669/12188 [12:09:16<13:15:26, 7.32s/it] 47%|████▋ | 5670/12188 [12:09:23<12:59:04, 7.17s/it] {'loss': 0.3447, 'grad_norm': 0.6546537022932486, 'learning_rate': 5.804343035509766e-06, 'epoch': 0.47} + 47%|████▋ | 5670/12188 [12:09:23<12:59:04, 7.17s/it] 47%|████▋ | 5671/12188 [12:09:29<12:48:24, 7.07s/it] {'loss': 0.3713, 'grad_norm': 0.6172072317935141, 'learning_rate': 5.803031606337668e-06, 'epoch': 0.47} + 47%|████▋ | 5671/12188 [12:09:29<12:48:24, 7.07s/it] 47%|████▋ | 5672/12188 [12:09:37<13:02:10, 7.20s/it] {'loss': 0.3009, 'grad_norm': 0.6415853456386006, 'learning_rate': 5.8017201204567685e-06, 'epoch': 0.47} + 47%|████▋ | 5672/12188 [12:09:37<13:02:10, 7.20s/it] 47%|████▋ | 5673/12188 [12:09:44<12:46:11, 7.06s/it] {'loss': 0.3373, 'grad_norm': 0.6181269219598822, 'learning_rate': 5.800408577959683e-06, 'epoch': 0.47} + 47%|████▋ | 5673/12188 [12:09:44<12:46:11, 7.06s/it] 47%|████▋ | 5674/12188 [12:09:51<13:05:08, 7.23s/it] {'loss': 0.3781, 'grad_norm': 0.7037319719523585, 'learning_rate': 5.799096978939029e-06, 'epoch': 0.47} + 47%|████▋ | 5674/12188 [12:09:51<13:05:08, 7.23s/it] 47%|████▋ | 5675/12188 [12:09:59<13:03:35, 7.22s/it] {'loss': 0.3532, 'grad_norm': 0.6682989150927433, 'learning_rate': 5.797785323487429e-06, 'epoch': 0.47} + 47%|████▋ | 5675/12188 [12:09:59<13:03:35, 7.22s/it] 47%|████▋ | 5676/12188 [12:10:06<12:55:36, 7.15s/it] {'loss': 0.3532, 'grad_norm': 0.8469001526364878, 'learning_rate': 5.7964736116975126e-06, 'epoch': 0.47} + 47%|████▋ | 5676/12188 [12:10:06<12:55:36, 7.15s/it] 47%|████▋ | 5677/12188 [12:10:12<12:40:55, 7.01s/it] {'loss': 0.3427, 'grad_norm': 0.881641625244505, 'learning_rate': 5.7951618436619094e-06, 'epoch': 0.47} + 47%|████▋ | 5677/12188 [12:10:12<12:40:55, 7.01s/it] 47%|████▋ | 5678/12188 [12:10:19<12:41:18, 7.02s/it] {'loss': 0.3002, 'grad_norm': 0.7798728802414348, 'learning_rate': 5.793850019473254e-06, 'epoch': 0.47} + 47%|████▋ | 5678/12188 [12:10:19<12:41:18, 7.02s/it] 47%|████▋ | 5679/12188 [12:10:27<12:53:23, 7.13s/it] {'loss': 0.3422, 'grad_norm': 0.6481697704564919, 'learning_rate': 5.7925381392241855e-06, 'epoch': 0.47} + 47%|████▋ | 5679/12188 [12:10:27<12:53:23, 7.13s/it] 47%|████▋ | 5680/12188 [12:10:34<12:58:32, 7.18s/it] {'loss': 0.3148, 'grad_norm': 0.6889872903662249, 'learning_rate': 5.791226203007346e-06, 'epoch': 0.47} + 47%|████▋ | 5680/12188 [12:10:34<12:58:32, 7.18s/it] 47%|████▋ | 5681/12188 [12:10:41<13:02:33, 7.22s/it] {'loss': 0.3078, 'grad_norm': 0.6506388174528905, 'learning_rate': 5.789914210915385e-06, 'epoch': 0.47} + 47%|████▋ | 5681/12188 [12:10:41<13:02:33, 7.22s/it] 47%|████▋ | 5682/12188 [12:10:48<12:49:32, 7.10s/it] {'loss': 0.3261, 'grad_norm': 0.6869316472456992, 'learning_rate': 5.788602163040951e-06, 'epoch': 0.47} + 47%|████▋ | 5682/12188 [12:10:48<12:49:32, 7.10s/it] 47%|████▋ | 5683/12188 [12:10:56<13:07:52, 7.27s/it] {'loss': 0.3468, 'grad_norm': 0.6950803822493425, 'learning_rate': 5.787290059476699e-06, 'epoch': 0.47} + 47%|████▋ | 5683/12188 [12:10:56<13:07:52, 7.27s/it] 47%|████▋ | 5684/12188 [12:11:03<13:07:39, 7.27s/it] {'loss': 0.3185, 'grad_norm': 0.8056980343966856, 'learning_rate': 5.785977900315287e-06, 'epoch': 0.47} + 47%|████▋ | 5684/12188 [12:11:03<13:07:39, 7.27s/it] 47%|████▋ | 5685/12188 [12:11:10<13:05:28, 7.25s/it] {'loss': 0.3216, 'grad_norm': 0.7406628628059071, 'learning_rate': 5.7846656856493784e-06, 'epoch': 0.47} + 47%|████▋ | 5685/12188 [12:11:10<13:05:28, 7.25s/it] 47%|████▋ | 5686/12188 [12:11:17<13:06:38, 7.26s/it] {'loss': 0.3674, 'grad_norm': 0.702193114892242, 'learning_rate': 5.783353415571642e-06, 'epoch': 0.47} + 47%|████▋ | 5686/12188 [12:11:17<13:06:38, 7.26s/it] 47%|████▋ | 5687/12188 [12:11:25<13:03:06, 7.23s/it] {'loss': 0.327, 'grad_norm': 1.219078887485048, 'learning_rate': 5.782041090174745e-06, 'epoch': 0.47} + 47%|████▋ | 5687/12188 [12:11:25<13:03:06, 7.23s/it] 47%|████▋ | 5688/12188 [12:11:32<13:13:51, 7.33s/it] {'loss': 0.3176, 'grad_norm': 0.7726659248602945, 'learning_rate': 5.780728709551361e-06, 'epoch': 0.47} + 47%|████▋ | 5688/12188 [12:11:32<13:13:51, 7.33s/it] 47%|████▋ | 5689/12188 [12:11:40<13:27:30, 7.46s/it] {'loss': 0.3111, 'grad_norm': 0.8934145421651492, 'learning_rate': 5.7794162737941715e-06, 'epoch': 0.47} + 47%|████▋ | 5689/12188 [12:11:40<13:27:30, 7.46s/it] 47%|████▋ | 5690/12188 [12:11:48<13:45:30, 7.62s/it] {'loss': 0.3354, 'grad_norm': 0.9569924369941886, 'learning_rate': 5.778103782995857e-06, 'epoch': 0.47} + 47%|████▋ | 5690/12188 [12:11:48<13:45:30, 7.62s/it] 47%|████▋ | 5691/12188 [12:11:55<13:17:32, 7.37s/it] {'loss': 0.3133, 'grad_norm': 0.628013597399316, 'learning_rate': 5.776791237249103e-06, 'epoch': 0.47} + 47%|████▋ | 5691/12188 [12:11:55<13:17:32, 7.37s/it] 47%|████▋ | 5692/12188 [12:12:02<13:15:13, 7.34s/it] {'loss': 0.3387, 'grad_norm': 0.8258790252714707, 'learning_rate': 5.7754786366465996e-06, 'epoch': 0.47} + 47%|████▋ | 5692/12188 [12:12:02<13:15:13, 7.34s/it] 47%|████▋ | 5693/12188 [12:12:09<13:07:28, 7.27s/it] {'loss': 0.3187, 'grad_norm': 0.7334875212838678, 'learning_rate': 5.774165981281041e-06, 'epoch': 0.47} + 47%|████▋ | 5693/12188 [12:12:09<13:07:28, 7.27s/it] 47%|████▋ | 5694/12188 [12:12:16<12:50:04, 7.12s/it] {'loss': 0.3077, 'grad_norm': 0.7243823709729214, 'learning_rate': 5.772853271245124e-06, 'epoch': 0.47} + 47%|████▋ | 5694/12188 [12:12:16<12:50:04, 7.12s/it] 47%|████▋ | 5695/12188 [12:12:24<13:34:34, 7.53s/it] {'loss': 0.3512, 'grad_norm': 0.6562767205730785, 'learning_rate': 5.771540506631552e-06, 'epoch': 0.47} + 47%|████▋ | 5695/12188 [12:12:24<13:34:34, 7.53s/it] 47%|████▋ | 5696/12188 [12:12:31<13:19:30, 7.39s/it] {'loss': 0.3513, 'grad_norm': 0.747020049775407, 'learning_rate': 5.770227687533029e-06, 'epoch': 0.47} + 47%|████▋ | 5696/12188 [12:12:31<13:19:30, 7.39s/it] 47%|████▋ | 5697/12188 [12:12:38<12:50:36, 7.12s/it] {'loss': 0.3472, 'grad_norm': 0.6871990735792035, 'learning_rate': 5.768914814042262e-06, 'epoch': 0.47} + 47%|████▋ | 5697/12188 [12:12:38<12:50:36, 7.12s/it] 47%|████▋ | 5698/12188 [12:12:46<13:31:07, 7.50s/it] {'loss': 0.3024, 'grad_norm': 0.7200514102697401, 'learning_rate': 5.767601886251968e-06, 'epoch': 0.47} + 47%|████▋ | 5698/12188 [12:12:46<13:31:07, 7.50s/it] 47%|████▋ | 5699/12188 [12:12:55<14:19:08, 7.94s/it] {'loss': 0.3462, 'grad_norm': 0.641699382158038, 'learning_rate': 5.766288904254862e-06, 'epoch': 0.47} + 47%|████▋ | 5699/12188 [12:12:55<14:19:08, 7.94s/it] 47%|████▋ | 5700/12188 [12:13:03<14:23:50, 7.99s/it] {'loss': 0.3418, 'grad_norm': 0.9742008458407339, 'learning_rate': 5.764975868143664e-06, 'epoch': 0.47} + 47%|████▋ | 5700/12188 [12:13:03<14:23:50, 7.99s/it] 47%|████▋ | 5701/12188 [12:13:12<14:44:08, 8.18s/it] {'loss': 0.3854, 'grad_norm': 0.6996650625797898, 'learning_rate': 5.763662778011101e-06, 'epoch': 0.47} + 47%|████▋ | 5701/12188 [12:13:12<14:44:08, 8.18s/it] 47%|████▋ | 5702/12188 [12:13:22<15:48:43, 8.78s/it] {'loss': 0.3339, 'grad_norm': 0.670192844227175, 'learning_rate': 5.762349633949897e-06, 'epoch': 0.47} + 47%|████▋ | 5702/12188 [12:13:22<15:48:43, 8.78s/it] 47%|████▋ | 5703/12188 [12:13:29<14:44:25, 8.18s/it] {'loss': 0.3095, 'grad_norm': 0.6712138851434098, 'learning_rate': 5.761036436052788e-06, 'epoch': 0.47} + 47%|████▋ | 5703/12188 [12:13:29<14:44:25, 8.18s/it] 47%|████▋ | 5704/12188 [12:13:36<14:07:47, 7.85s/it] {'loss': 0.3419, 'grad_norm': 0.9976963519196268, 'learning_rate': 5.759723184412509e-06, 'epoch': 0.47} + 47%|████▋ | 5704/12188 [12:13:36<14:07:47, 7.85s/it] 47%|████▋ | 5705/12188 [12:13:43<13:42:28, 7.61s/it] {'loss': 0.3535, 'grad_norm': 0.7349988692175181, 'learning_rate': 5.758409879121799e-06, 'epoch': 0.47} + 47%|████▋ | 5705/12188 [12:13:43<13:42:28, 7.61s/it] 47%|████▋ | 5706/12188 [12:13:51<13:39:10, 7.58s/it] {'loss': 0.3507, 'grad_norm': 0.6598762360214864, 'learning_rate': 5.7570965202734e-06, 'epoch': 0.47} + 47%|████▋ | 5706/12188 [12:13:51<13:39:10, 7.58s/it] 47%|████▋ | 5707/12188 [12:13:58<13:49:15, 7.68s/it] {'loss': 0.3347, 'grad_norm': 0.660756998655975, 'learning_rate': 5.755783107960064e-06, 'epoch': 0.47} + 47%|████▋ | 5707/12188 [12:13:59<13:49:15, 7.68s/it] 47%|████▋ | 5708/12188 [12:14:06<13:29:05, 7.49s/it] {'loss': 0.3292, 'grad_norm': 0.7869587727039768, 'learning_rate': 5.754469642274538e-06, 'epoch': 0.47} + 47%|████▋ | 5708/12188 [12:14:06<13:29:05, 7.49s/it] 47%|████▋ | 5709/12188 [12:14:13<13:22:17, 7.43s/it] {'loss': 0.3084, 'grad_norm': 0.6709887625052827, 'learning_rate': 5.753156123309578e-06, 'epoch': 0.47} + 47%|████▋ | 5709/12188 [12:14:13<13:22:17, 7.43s/it] 47%|████▋ | 5710/12188 [12:14:20<13:17:45, 7.39s/it] {'loss': 0.3139, 'grad_norm': 0.6717123093823959, 'learning_rate': 5.7518425511579424e-06, 'epoch': 0.47} + 47%|████▋ | 5710/12188 [12:14:20<13:17:45, 7.39s/it] 47%|████▋ | 5711/12188 [12:14:28<13:17:53, 7.39s/it] {'loss': 0.3481, 'grad_norm': 0.7086692278963683, 'learning_rate': 5.7505289259123934e-06, 'epoch': 0.47} + 47%|████▋ | 5711/12188 [12:14:28<13:17:53, 7.39s/it] 47%|████▋ | 5712/12188 [12:14:35<13:10:06, 7.32s/it] {'loss': 0.2985, 'grad_norm': 0.6657687860660522, 'learning_rate': 5.7492152476656975e-06, 'epoch': 0.47} + 47%|████▋ | 5712/12188 [12:14:35<13:10:06, 7.32s/it] 47%|████▋ | 5713/12188 [12:14:42<13:15:20, 7.37s/it] {'loss': 0.3719, 'grad_norm': 0.7575455536671059, 'learning_rate': 5.747901516510624e-06, 'epoch': 0.47} + 47%|████▋ | 5713/12188 [12:14:42<13:15:20, 7.37s/it] 47%|████▋ | 5714/12188 [12:14:49<13:04:26, 7.27s/it] {'loss': 0.3594, 'grad_norm': 0.6506929167519427, 'learning_rate': 5.746587732539949e-06, 'epoch': 0.47} + 47%|████▋ | 5714/12188 [12:14:49<13:04:26, 7.27s/it] 47%|████▋ | 5715/12188 [12:14:56<13:02:16, 7.25s/it] {'loss': 0.3269, 'grad_norm': 0.6178091799936627, 'learning_rate': 5.745273895846447e-06, 'epoch': 0.47} + 47%|████▋ | 5715/12188 [12:14:56<13:02:16, 7.25s/it] 47%|████▋ | 5716/12188 [12:15:04<13:03:52, 7.27s/it] {'loss': 0.3214, 'grad_norm': 0.6607663252540589, 'learning_rate': 5.743960006522899e-06, 'epoch': 0.47} + 47%|████▋ | 5716/12188 [12:15:04<13:03:52, 7.27s/it] 47%|████▋ | 5717/12188 [12:15:11<12:58:05, 7.21s/it] {'loss': 0.3547, 'grad_norm': 1.161593604589931, 'learning_rate': 5.742646064662092e-06, 'epoch': 0.47} + 47%|████▋ | 5717/12188 [12:15:11<12:58:05, 7.21s/it] 47%|████▋ | 5718/12188 [12:15:18<12:48:00, 7.12s/it] {'loss': 0.3121, 'grad_norm': 0.6820126070263495, 'learning_rate': 5.741332070356812e-06, 'epoch': 0.47} + 47%|████▋ | 5718/12188 [12:15:18<12:48:00, 7.12s/it] 47%|████▋ | 5719/12188 [12:15:24<12:36:13, 7.01s/it] {'loss': 0.3402, 'grad_norm': 0.7454690114338495, 'learning_rate': 5.7400180236998524e-06, 'epoch': 0.47} + 47%|████▋ | 5719/12188 [12:15:24<12:36:13, 7.01s/it] 47%|████▋ | 5720/12188 [12:15:32<13:02:21, 7.26s/it] {'loss': 0.3143, 'grad_norm': 0.6594328291796367, 'learning_rate': 5.738703924784008e-06, 'epoch': 0.47} + 47%|████▋ | 5720/12188 [12:15:32<13:02:21, 7.26s/it] 47%|████▋ | 5721/12188 [12:15:39<12:59:13, 7.23s/it] {'loss': 0.3351, 'grad_norm': 0.651740142039005, 'learning_rate': 5.7373897737020814e-06, 'epoch': 0.47} + 47%|████▋ | 5721/12188 [12:15:39<12:59:13, 7.23s/it] 47%|████▋ | 5722/12188 [12:15:47<13:07:51, 7.31s/it] {'loss': 0.3441, 'grad_norm': 0.6183046494310948, 'learning_rate': 5.7360755705468725e-06, 'epoch': 0.47} + 47%|████▋ | 5722/12188 [12:15:47<13:07:51, 7.31s/it] 47%|████▋ | 5723/12188 [12:15:54<13:05:42, 7.29s/it] {'loss': 0.2999, 'grad_norm': 0.709894475386862, 'learning_rate': 5.734761315411189e-06, 'epoch': 0.47} + 47%|████▋ | 5723/12188 [12:15:54<13:05:42, 7.29s/it] 47%|████▋ | 5724/12188 [12:16:01<12:51:35, 7.16s/it] {'loss': 0.3097, 'grad_norm': 0.8278949263630512, 'learning_rate': 5.733447008387842e-06, 'epoch': 0.47} + 47%|████▋ | 5724/12188 [12:16:01<12:51:35, 7.16s/it] 47%|████▋ | 5725/12188 [12:16:08<12:41:03, 7.07s/it] {'loss': 0.3573, 'grad_norm': 0.6035581250595632, 'learning_rate': 5.7321326495696465e-06, 'epoch': 0.47} + 47%|████▋ | 5725/12188 [12:16:08<12:41:03, 7.07s/it] 47%|████▋ | 5726/12188 [12:16:16<13:05:15, 7.29s/it] {'loss': 0.3536, 'grad_norm': 0.6042139354667709, 'learning_rate': 5.7308182390494185e-06, 'epoch': 0.47} + 47%|████▋ | 5726/12188 [12:16:16<13:05:15, 7.29s/it] 47%|████▋ | 5727/12188 [12:16:23<12:56:54, 7.21s/it] {'loss': 0.3423, 'grad_norm': 0.6485460533640659, 'learning_rate': 5.729503776919981e-06, 'epoch': 0.47} + 47%|████▋ | 5727/12188 [12:16:23<12:56:54, 7.21s/it] 47%|████▋ | 5728/12188 [12:16:30<13:07:10, 7.31s/it] {'loss': 0.3282, 'grad_norm': 0.6630573915486866, 'learning_rate': 5.7281892632741585e-06, 'epoch': 0.47} + 47%|████▋ | 5728/12188 [12:16:30<13:07:10, 7.31s/it] 47%|████▋ | 5729/12188 [12:16:37<13:00:26, 7.25s/it] {'loss': 0.3217, 'grad_norm': 0.6011098123159836, 'learning_rate': 5.726874698204781e-06, 'epoch': 0.47} + 47%|████▋ | 5729/12188 [12:16:37<13:00:26, 7.25s/it] 47%|████▋ | 5730/12188 [12:16:44<12:41:28, 7.07s/it] {'loss': 0.32, 'grad_norm': 0.6220820824827509, 'learning_rate': 5.72556008180468e-06, 'epoch': 0.47} + 47%|████▋ | 5730/12188 [12:16:44<12:41:28, 7.07s/it] 47%|████▋ | 5731/12188 [12:16:51<12:26:52, 6.94s/it] {'loss': 0.3327, 'grad_norm': 0.7302613623823453, 'learning_rate': 5.724245414166692e-06, 'epoch': 0.47} + 47%|████▋ | 5731/12188 [12:16:51<12:26:52, 6.94s/it] 47%|████▋ | 5732/12188 [12:16:58<12:49:32, 7.15s/it] {'loss': 0.3485, 'grad_norm': 0.742562949137512, 'learning_rate': 5.722930695383655e-06, 'epoch': 0.47} + 47%|████▋ | 5732/12188 [12:16:58<12:49:32, 7.15s/it] 47%|████▋ | 5733/12188 [12:17:05<12:37:28, 7.04s/it] {'loss': 0.3191, 'grad_norm': 0.7109741433953998, 'learning_rate': 5.721615925548414e-06, 'epoch': 0.47} + 47%|████▋ | 5733/12188 [12:17:05<12:37:28, 7.04s/it] 47%|████▋ | 5734/12188 [12:17:12<12:41:34, 7.08s/it] {'loss': 0.3453, 'grad_norm': 0.7236661873428676, 'learning_rate': 5.720301104753816e-06, 'epoch': 0.47} + 47%|████▋ | 5734/12188 [12:17:12<12:41:34, 7.08s/it] 47%|████▋ | 5735/12188 [12:17:21<13:24:04, 7.48s/it] {'loss': 0.3246, 'grad_norm': 0.6135740881429658, 'learning_rate': 5.718986233092713e-06, 'epoch': 0.47} + 47%|████▋ | 5735/12188 [12:17:21<13:24:04, 7.48s/it] 47%|████▋ | 5736/12188 [12:17:28<13:22:33, 7.46s/it] {'loss': 0.3707, 'grad_norm': 0.6425593800666708, 'learning_rate': 5.717671310657956e-06, 'epoch': 0.47} + 47%|████▋ | 5736/12188 [12:17:28<13:22:33, 7.46s/it] 47%|████▋ | 5737/12188 [12:17:35<13:12:53, 7.37s/it] {'loss': 0.3158, 'grad_norm': 1.537081774256029, 'learning_rate': 5.716356337542404e-06, 'epoch': 0.47} + 47%|████▋ | 5737/12188 [12:17:35<13:12:53, 7.37s/it] 47%|████▋ | 5738/12188 [12:17:42<12:53:40, 7.20s/it] {'loss': 0.3266, 'grad_norm': 0.7010697705213512, 'learning_rate': 5.715041313838917e-06, 'epoch': 0.47} + 47%|████▋ | 5738/12188 [12:17:42<12:53:40, 7.20s/it] 47%|████▋ | 5739/12188 [12:17:49<12:35:22, 7.03s/it] {'loss': 0.3134, 'grad_norm': 0.6005854304673508, 'learning_rate': 5.713726239640363e-06, 'epoch': 0.47} + 47%|████▋ | 5739/12188 [12:17:49<12:35:22, 7.03s/it] 47%|████▋ | 5740/12188 [12:17:56<12:37:39, 7.05s/it] {'loss': 0.3275, 'grad_norm': 0.6688207478880569, 'learning_rate': 5.712411115039609e-06, 'epoch': 0.47} + 47%|████▋ | 5740/12188 [12:17:56<12:37:39, 7.05s/it] 47%|████▋ | 5741/12188 [12:18:03<12:42:04, 7.09s/it] {'loss': 0.3471, 'grad_norm': 0.6581403551233702, 'learning_rate': 5.711095940129525e-06, 'epoch': 0.47} + 47%|████▋ | 5741/12188 [12:18:03<12:42:04, 7.09s/it] 47%|████▋ | 5742/12188 [12:18:10<12:33:12, 7.01s/it] {'loss': 0.3664, 'grad_norm': 0.6992679011650181, 'learning_rate': 5.70978071500299e-06, 'epoch': 0.47} + 47%|████▋ | 5742/12188 [12:18:10<12:33:12, 7.01s/it] 47%|████▋ | 5743/12188 [12:18:17<12:52:10, 7.19s/it] {'loss': 0.3584, 'grad_norm': 0.6251148743905443, 'learning_rate': 5.7084654397528795e-06, 'epoch': 0.47} + 47%|████▋ | 5743/12188 [12:18:17<12:52:10, 7.19s/it] 47%|████▋ | 5744/12188 [12:18:25<13:00:41, 7.27s/it] {'loss': 0.3381, 'grad_norm': 0.6954666603326526, 'learning_rate': 5.707150114472079e-06, 'epoch': 0.47} + 47%|████▋ | 5744/12188 [12:18:25<13:00:41, 7.27s/it] 47%|████▋ | 5745/12188 [12:18:32<12:49:27, 7.17s/it] {'loss': 0.3326, 'grad_norm': 0.75920640573586, 'learning_rate': 5.705834739253473e-06, 'epoch': 0.47} + 47%|████▋ | 5745/12188 [12:18:32<12:49:27, 7.17s/it] 47%|████▋ | 5746/12188 [12:18:39<12:49:07, 7.16s/it] {'loss': 0.3178, 'grad_norm': 0.7270033558351008, 'learning_rate': 5.704519314189953e-06, 'epoch': 0.47} + 47%|████▋ | 5746/12188 [12:18:39<12:49:07, 7.16s/it] 47%|████▋ | 5747/12188 [12:18:47<13:08:37, 7.35s/it] {'loss': 0.3358, 'grad_norm': 0.656186741127401, 'learning_rate': 5.703203839374408e-06, 'epoch': 0.47} + 47%|████▋ | 5747/12188 [12:18:47<13:08:37, 7.35s/it] 47%|████▋ | 5748/12188 [12:18:54<13:01:11, 7.28s/it] {'loss': 0.3732, 'grad_norm': 0.6747194347399383, 'learning_rate': 5.70188831489974e-06, 'epoch': 0.47} + 47%|████▋ | 5748/12188 [12:18:54<13:01:11, 7.28s/it] 47%|████▋ | 5749/12188 [12:19:01<12:46:09, 7.14s/it] {'loss': 0.3189, 'grad_norm': 0.688141510426695, 'learning_rate': 5.700572740858847e-06, 'epoch': 0.47} + 47%|████▋ | 5749/12188 [12:19:01<12:46:09, 7.14s/it] 47%|████▋ | 5750/12188 [12:19:08<12:52:11, 7.20s/it] {'loss': 0.3405, 'grad_norm': 0.673985480759074, 'learning_rate': 5.699257117344632e-06, 'epoch': 0.47} + 47%|████▋ | 5750/12188 [12:19:08<12:52:11, 7.20s/it] 47%|████▋ | 5751/12188 [12:19:15<12:52:12, 7.20s/it] {'loss': 0.3568, 'grad_norm': 0.6891613811334615, 'learning_rate': 5.697941444450002e-06, 'epoch': 0.47} + 47%|████▋ | 5751/12188 [12:19:15<12:52:12, 7.20s/it] 47%|████▋ | 5752/12188 [12:19:22<12:45:14, 7.13s/it] {'loss': 0.3354, 'grad_norm': 0.8787203493200316, 'learning_rate': 5.696625722267869e-06, 'epoch': 0.47} + 47%|████▋ | 5752/12188 [12:19:22<12:45:14, 7.13s/it] 47%|████▋ | 5753/12188 [12:19:30<12:52:47, 7.21s/it] {'loss': 0.3622, 'grad_norm': 0.8022905553881532, 'learning_rate': 5.695309950891147e-06, 'epoch': 0.47} + 47%|████▋ | 5753/12188 [12:19:30<12:52:47, 7.21s/it] 47%|████▋ | 5754/12188 [12:19:37<13:04:10, 7.31s/it] {'loss': 0.2926, 'grad_norm': 0.6561820099233279, 'learning_rate': 5.693994130412753e-06, 'epoch': 0.47} + 47%|████▋ | 5754/12188 [12:19:37<13:04:10, 7.31s/it] 47%|████▋ | 5755/12188 [12:19:44<12:53:20, 7.21s/it] {'loss': 0.325, 'grad_norm': 0.649180701805331, 'learning_rate': 5.692678260925609e-06, 'epoch': 0.47} + 47%|████▋ | 5755/12188 [12:19:44<12:53:20, 7.21s/it] 47%|████▋ | 5756/12188 [12:19:52<13:00:43, 7.28s/it] {'loss': 0.3444, 'grad_norm': 0.6210872341638767, 'learning_rate': 5.691362342522639e-06, 'epoch': 0.47} + 47%|████▋ | 5756/12188 [12:19:52<13:00:43, 7.28s/it] 47%|████▋ | 5757/12188 [12:19:59<12:54:20, 7.22s/it] {'loss': 0.3338, 'grad_norm': 0.6481060080526465, 'learning_rate': 5.6900463752967715e-06, 'epoch': 0.47} + 47%|████▋ | 5757/12188 [12:19:59<12:54:20, 7.22s/it] 47%|████▋ | 5758/12188 [12:20:06<13:01:48, 7.30s/it] {'loss': 0.3502, 'grad_norm': 0.6462421742731278, 'learning_rate': 5.6887303593409375e-06, 'epoch': 0.47} + 47%|████▋ | 5758/12188 [12:20:06<13:01:48, 7.30s/it] 47%|████▋ | 5759/12188 [12:20:13<12:41:10, 7.10s/it] {'loss': 0.3269, 'grad_norm': 0.7013442654604444, 'learning_rate': 5.687414294748072e-06, 'epoch': 0.47} + 47%|████▋ | 5759/12188 [12:20:13<12:41:10, 7.10s/it] 47%|████▋ | 5760/12188 [12:20:20<12:55:42, 7.24s/it] {'loss': 0.3688, 'grad_norm': 0.6560393469616381, 'learning_rate': 5.686098181611112e-06, 'epoch': 0.47} + 47%|████▋ | 5760/12188 [12:20:20<12:55:42, 7.24s/it] 47%|████▋ | 5761/12188 [12:20:28<13:04:59, 7.33s/it] {'loss': 0.3295, 'grad_norm': 0.6925767556787797, 'learning_rate': 5.684782020023002e-06, 'epoch': 0.47} + 47%|████▋ | 5761/12188 [12:20:28<13:04:59, 7.33s/it] 47%|████▋ | 5762/12188 [12:20:35<13:10:38, 7.38s/it] {'loss': 0.3199, 'grad_norm': 0.7258193454719536, 'learning_rate': 5.683465810076686e-06, 'epoch': 0.47} + 47%|████▋ | 5762/12188 [12:20:35<13:10:38, 7.38s/it] 47%|████▋ | 5763/12188 [12:20:43<13:11:59, 7.40s/it] {'loss': 0.3787, 'grad_norm': 1.0716855582095814, 'learning_rate': 5.682149551865114e-06, 'epoch': 0.47} + 47%|████▋ | 5763/12188 [12:20:43<13:11:59, 7.40s/it] 47%|████▋ | 5764/12188 [12:20:51<13:47:39, 7.73s/it] {'loss': 0.351, 'grad_norm': 0.7570152423512436, 'learning_rate': 5.680833245481234e-06, 'epoch': 0.47} + 47%|████▋ | 5764/12188 [12:20:51<13:47:39, 7.73s/it] 47%|████▋ | 5765/12188 [12:20:58<13:17:08, 7.45s/it] {'loss': 0.3551, 'grad_norm': 0.7124877417694819, 'learning_rate': 5.6795168910180045e-06, 'epoch': 0.47} + 47%|████▋ | 5765/12188 [12:20:58<13:17:08, 7.45s/it] 47%|████▋ | 5766/12188 [12:21:05<12:48:24, 7.18s/it] {'loss': 0.3591, 'grad_norm': 0.6239413665155625, 'learning_rate': 5.678200488568384e-06, 'epoch': 0.47} + 47%|████▋ | 5766/12188 [12:21:05<12:48:24, 7.18s/it] 47%|████▋ | 5767/12188 [12:21:12<12:54:20, 7.24s/it] {'loss': 0.3151, 'grad_norm': 0.6014923795694451, 'learning_rate': 5.676884038225335e-06, 'epoch': 0.47} + 47%|████▋ | 5767/12188 [12:21:12<12:54:20, 7.24s/it] 47%|████▋ | 5768/12188 [12:21:20<13:24:40, 7.52s/it] {'loss': 0.3274, 'grad_norm': 0.638810439489679, 'learning_rate': 5.675567540081823e-06, 'epoch': 0.47} + 47%|████▋ | 5768/12188 [12:21:20<13:24:40, 7.52s/it] 47%|████▋ | 5769/12188 [12:21:28<13:46:37, 7.73s/it] {'loss': 0.3512, 'grad_norm': 0.5906772921991192, 'learning_rate': 5.674250994230816e-06, 'epoch': 0.47} + 47%|████▋ | 5769/12188 [12:21:28<13:46:37, 7.73s/it] 47%|████▋ | 5770/12188 [12:21:36<13:30:24, 7.58s/it] {'loss': 0.3427, 'grad_norm': 0.7051188601196372, 'learning_rate': 5.672934400765289e-06, 'epoch': 0.47} + 47%|████▋ | 5770/12188 [12:21:36<13:30:24, 7.58s/it] 47%|████▋ | 5771/12188 [12:21:42<12:58:03, 7.28s/it] {'loss': 0.3666, 'grad_norm': 0.7389448390714702, 'learning_rate': 5.671617759778214e-06, 'epoch': 0.47} + 47%|████▋ | 5771/12188 [12:21:42<12:58:03, 7.28s/it] 47%|████▋ | 5772/12188 [12:21:49<12:43:59, 7.14s/it] {'loss': 0.3497, 'grad_norm': 0.6917660656409399, 'learning_rate': 5.6703010713625715e-06, 'epoch': 0.47} + 47%|████▋ | 5772/12188 [12:21:49<12:43:59, 7.14s/it] 47%|████▋ | 5773/12188 [12:21:57<12:56:41, 7.26s/it] {'loss': 0.3567, 'grad_norm': 0.6387875719699064, 'learning_rate': 5.668984335611345e-06, 'epoch': 0.47} + 47%|████▋ | 5773/12188 [12:21:57<12:56:41, 7.26s/it] 47%|████▋ | 5774/12188 [12:22:04<13:01:44, 7.31s/it] {'loss': 0.3362, 'grad_norm': 1.543371687212305, 'learning_rate': 5.667667552617518e-06, 'epoch': 0.47} + 47%|████▋ | 5774/12188 [12:22:04<13:01:44, 7.31s/it] 47%|████▋ | 5775/12188 [12:22:11<12:59:53, 7.30s/it] {'loss': 0.2958, 'grad_norm': 0.6860792655535065, 'learning_rate': 5.666350722474083e-06, 'epoch': 0.47} + 47%|████▋ | 5775/12188 [12:22:11<12:59:53, 7.30s/it] 47%|████▋ | 5776/12188 [12:22:18<12:42:58, 7.14s/it] {'loss': 0.3145, 'grad_norm': 0.7064669655489002, 'learning_rate': 5.66503384527403e-06, 'epoch': 0.47} + 47%|████▋ | 5776/12188 [12:22:18<12:42:58, 7.14s/it] 47%|████▋ | 5777/12188 [12:22:25<12:34:26, 7.06s/it] {'loss': 0.3522, 'grad_norm': 0.751933844920491, 'learning_rate': 5.663716921110357e-06, 'epoch': 0.47} + 47%|████▋ | 5777/12188 [12:22:25<12:34:26, 7.06s/it] 47%|████▋ | 5778/12188 [12:22:32<12:34:16, 7.06s/it] {'loss': 0.3345, 'grad_norm': 0.6775041354192025, 'learning_rate': 5.662399950076059e-06, 'epoch': 0.47} + 47%|████▋ | 5778/12188 [12:22:32<12:34:16, 7.06s/it] 47%|████▋ | 5779/12188 [12:22:39<12:21:32, 6.94s/it] {'loss': 0.3535, 'grad_norm': 0.9536142358821604, 'learning_rate': 5.6610829322641415e-06, 'epoch': 0.47} + 47%|████▋ | 5779/12188 [12:22:39<12:21:32, 6.94s/it] 47%|████▋ | 5780/12188 [12:22:45<12:15:02, 6.88s/it] {'loss': 0.3551, 'grad_norm': 0.7068279170781496, 'learning_rate': 5.65976586776761e-06, 'epoch': 0.47} + 47%|████▋ | 5780/12188 [12:22:45<12:15:02, 6.88s/it] 47%|████▋ | 5781/12188 [12:22:52<12:17:45, 6.91s/it] {'loss': 0.3549, 'grad_norm': 0.6769394456325483, 'learning_rate': 5.658448756679472e-06, 'epoch': 0.47} + 47%|████▋ | 5781/12188 [12:22:52<12:17:45, 6.91s/it] 47%|████▋ | 5782/12188 [12:23:00<12:49:16, 7.21s/it] {'loss': 0.3322, 'grad_norm': 0.6785427069445297, 'learning_rate': 5.657131599092741e-06, 'epoch': 0.47} + 47%|████▋ | 5782/12188 [12:23:00<12:49:16, 7.21s/it] 47%|████▋ | 5783/12188 [12:23:07<12:33:50, 7.06s/it] {'loss': 0.3088, 'grad_norm': 0.6581315422419188, 'learning_rate': 5.655814395100432e-06, 'epoch': 0.47} + 47%|████▋ | 5783/12188 [12:23:07<12:33:50, 7.06s/it] 47%|████▋ | 5784/12188 [12:23:14<12:44:41, 7.16s/it] {'loss': 0.3326, 'grad_norm': 0.64586113633901, 'learning_rate': 5.654497144795564e-06, 'epoch': 0.47} + 47%|████▋ | 5784/12188 [12:23:14<12:44:41, 7.16s/it] 47%|████▋ | 5785/12188 [12:23:21<12:25:05, 6.98s/it] {'loss': 0.3164, 'grad_norm': 0.6640640874584368, 'learning_rate': 5.65317984827116e-06, 'epoch': 0.47} + 47%|████▋ | 5785/12188 [12:23:21<12:25:05, 6.98s/it] 47%|████▋ | 5786/12188 [12:23:28<12:25:59, 6.99s/it] {'loss': 0.3026, 'grad_norm': 0.6547447252127988, 'learning_rate': 5.651862505620245e-06, 'epoch': 0.47} + 47%|████▋ | 5786/12188 [12:23:28<12:25:59, 6.99s/it] 47%|████▋ | 5787/12188 [12:23:36<12:49:04, 7.21s/it] {'loss': 0.3004, 'grad_norm': 0.8712893439372147, 'learning_rate': 5.650545116935845e-06, 'epoch': 0.47} + 47%|████▋ | 5787/12188 [12:23:36<12:49:04, 7.21s/it] 47%|████▋ | 5788/12188 [12:23:43<13:04:48, 7.36s/it] {'loss': 0.3166, 'grad_norm': 0.6824468082435192, 'learning_rate': 5.649227682310995e-06, 'epoch': 0.47} + 47%|████▋ | 5788/12188 [12:23:43<13:04:48, 7.36s/it] 47%|████▋ | 5789/12188 [12:23:50<12:50:25, 7.22s/it] {'loss': 0.3046, 'grad_norm': 0.7256639738935461, 'learning_rate': 5.6479102018387284e-06, 'epoch': 0.47} + 47%|████▋ | 5789/12188 [12:23:50<12:50:25, 7.22s/it] 48%|████▊ | 5790/12188 [12:23:58<13:04:18, 7.36s/it] {'loss': 0.331, 'grad_norm': 0.735702267371449, 'learning_rate': 5.6465926756120856e-06, 'epoch': 0.48} + 48%|████▊ | 5790/12188 [12:23:58<13:04:18, 7.36s/it] 48%|████▊ | 5791/12188 [12:24:05<12:52:44, 7.25s/it] {'loss': 0.3535, 'grad_norm': 0.7384671705556669, 'learning_rate': 5.645275103724108e-06, 'epoch': 0.48} + 48%|████▊ | 5791/12188 [12:24:05<12:52:44, 7.25s/it] 48%|████▊ | 5792/12188 [12:24:12<12:53:06, 7.25s/it] {'loss': 0.3426, 'grad_norm': 1.092933937263611, 'learning_rate': 5.643957486267837e-06, 'epoch': 0.48} + 48%|████▊ | 5792/12188 [12:24:12<12:53:06, 7.25s/it] 48%|████▊ | 5793/12188 [12:24:19<12:43:49, 7.17s/it] {'loss': 0.303, 'grad_norm': 0.784418465334993, 'learning_rate': 5.642639823336325e-06, 'epoch': 0.48} + 48%|████▊ | 5793/12188 [12:24:19<12:43:49, 7.17s/it] 48%|████▊ | 5794/12188 [12:24:26<12:48:26, 7.21s/it] {'loss': 0.3057, 'grad_norm': 0.6320436418934405, 'learning_rate': 5.641322115022619e-06, 'epoch': 0.48} + 48%|████▊ | 5794/12188 [12:24:27<12:48:26, 7.21s/it] 48%|████▊ | 5795/12188 [12:24:34<12:41:46, 7.15s/it] {'loss': 0.3259, 'grad_norm': 0.7801603886915517, 'learning_rate': 5.640004361419776e-06, 'epoch': 0.48} + 48%|████▊ | 5795/12188 [12:24:34<12:41:46, 7.15s/it] 48%|████▊ | 5796/12188 [12:24:42<13:14:42, 7.46s/it] {'loss': 0.3129, 'grad_norm': 0.6574120730517724, 'learning_rate': 5.638686562620853e-06, 'epoch': 0.48} + 48%|████▊ | 5796/12188 [12:24:42<13:14:42, 7.46s/it] 48%|████▊ | 5797/12188 [12:24:49<13:08:51, 7.41s/it] {'loss': 0.3657, 'grad_norm': 0.7998188746925676, 'learning_rate': 5.637368718718913e-06, 'epoch': 0.48} + 48%|████▊ | 5797/12188 [12:24:49<13:08:51, 7.41s/it] 48%|████▊ | 5798/12188 [12:24:58<13:45:40, 7.75s/it] {'loss': 0.3462, 'grad_norm': 0.7717443659888918, 'learning_rate': 5.636050829807017e-06, 'epoch': 0.48} + 48%|████▊ | 5798/12188 [12:24:58<13:45:40, 7.75s/it] 48%|████▊ | 5799/12188 [12:25:05<13:43:25, 7.73s/it] {'loss': 0.3196, 'grad_norm': 0.6558847518562111, 'learning_rate': 5.634732895978233e-06, 'epoch': 0.48} + 48%|████▊ | 5799/12188 [12:25:05<13:43:25, 7.73s/it] 48%|████▊ | 5800/12188 [12:25:12<13:20:16, 7.52s/it] {'loss': 0.329, 'grad_norm': 0.6558742209136228, 'learning_rate': 5.6334149173256324e-06, 'epoch': 0.48} + 48%|████▊ | 5800/12188 [12:25:12<13:20:16, 7.52s/it] 48%|████▊ | 5801/12188 [12:25:20<13:16:59, 7.49s/it] {'loss': 0.2868, 'grad_norm': 0.6110398874714734, 'learning_rate': 5.632096893942286e-06, 'epoch': 0.48} + 48%|████▊ | 5801/12188 [12:25:20<13:16:59, 7.49s/it] 48%|████▊ | 5802/12188 [12:25:27<13:18:18, 7.50s/it] {'loss': 0.3336, 'grad_norm': 0.9916059847666463, 'learning_rate': 5.630778825921274e-06, 'epoch': 0.48} + 48%|████▊ | 5802/12188 [12:25:27<13:18:18, 7.50s/it] 48%|████▊ | 5803/12188 [12:25:34<13:01:51, 7.35s/it] {'loss': 0.2869, 'grad_norm': 0.7993609454073255, 'learning_rate': 5.629460713355674e-06, 'epoch': 0.48} + 48%|████▊ | 5803/12188 [12:25:34<13:01:51, 7.35s/it] 48%|████▊ | 5804/12188 [12:25:41<12:38:13, 7.13s/it] {'loss': 0.3754, 'grad_norm': 0.7433008900036907, 'learning_rate': 5.628142556338572e-06, 'epoch': 0.48} + 48%|████▊ | 5804/12188 [12:25:41<12:38:13, 7.13s/it] 48%|████▊ | 5805/12188 [12:25:48<12:26:35, 7.02s/it] {'loss': 0.3244, 'grad_norm': 0.9001305751367051, 'learning_rate': 5.6268243549630495e-06, 'epoch': 0.48} + 48%|████▊ | 5805/12188 [12:25:48<12:26:35, 7.02s/it] 48%|████▊ | 5806/12188 [12:25:55<12:42:59, 7.17s/it] {'loss': 0.3246, 'grad_norm': 0.7408216421156409, 'learning_rate': 5.625506109322198e-06, 'epoch': 0.48} + 48%|████▊ | 5806/12188 [12:25:55<12:42:59, 7.17s/it] 48%|████▊ | 5807/12188 [12:26:03<13:01:31, 7.35s/it] {'loss': 0.3568, 'grad_norm': 0.6499653493560755, 'learning_rate': 5.624187819509111e-06, 'epoch': 0.48} + 48%|████▊ | 5807/12188 [12:26:03<13:01:31, 7.35s/it] 48%|████▊ | 5808/12188 [12:26:11<13:23:17, 7.55s/it] {'loss': 0.3413, 'grad_norm': 1.0310101885836804, 'learning_rate': 5.622869485616884e-06, 'epoch': 0.48} + 48%|████▊ | 5808/12188 [12:26:11<13:23:17, 7.55s/it] 48%|████▊ | 5809/12188 [12:26:18<13:19:09, 7.52s/it] {'loss': 0.3397, 'grad_norm': 0.6170460325650196, 'learning_rate': 5.621551107738612e-06, 'epoch': 0.48} + 48%|████▊ | 5809/12188 [12:26:18<13:19:09, 7.52s/it] 48%|████▊ | 5810/12188 [12:26:26<13:31:04, 7.63s/it] {'loss': 0.3724, 'grad_norm': 0.645885724607222, 'learning_rate': 5.620232685967401e-06, 'epoch': 0.48} + 48%|████▊ | 5810/12188 [12:26:26<13:31:04, 7.63s/it] 48%|████▊ | 5811/12188 [12:26:33<13:12:58, 7.46s/it] {'loss': 0.3249, 'grad_norm': 0.6686065530027615, 'learning_rate': 5.6189142203963555e-06, 'epoch': 0.48} + 48%|████▊ | 5811/12188 [12:26:33<13:12:58, 7.46s/it] 48%|████▊ | 5812/12188 [12:26:41<13:27:00, 7.59s/it] {'loss': 0.3382, 'grad_norm': 0.7316135058939681, 'learning_rate': 5.617595711118581e-06, 'epoch': 0.48} + 48%|████▊ | 5812/12188 [12:26:41<13:27:00, 7.59s/it] 48%|████▊ | 5813/12188 [12:26:49<13:19:00, 7.52s/it] {'loss': 0.3342, 'grad_norm': 0.6691424404557327, 'learning_rate': 5.616277158227189e-06, 'epoch': 0.48} + 48%|████▊ | 5813/12188 [12:26:49<13:19:00, 7.52s/it] 48%|████▊ | 5814/12188 [12:26:56<13:25:41, 7.58s/it] {'loss': 0.3235, 'grad_norm': 0.7619711302672337, 'learning_rate': 5.614958561815293e-06, 'epoch': 0.48} + 48%|████▊ | 5814/12188 [12:26:56<13:25:41, 7.58s/it] 48%|█���██▊ | 5815/12188 [12:27:03<13:06:32, 7.41s/it] {'loss': 0.3029, 'grad_norm': 0.7216352436105262, 'learning_rate': 5.613639921976014e-06, 'epoch': 0.48} + 48%|████▊ | 5815/12188 [12:27:03<13:06:32, 7.41s/it] 48%|████▊ | 5816/12188 [12:27:11<13:31:10, 7.64s/it] {'loss': 0.3414, 'grad_norm': 0.6899617764570439, 'learning_rate': 5.612321238802469e-06, 'epoch': 0.48} + 48%|████▊ | 5816/12188 [12:27:11<13:31:10, 7.64s/it] 48%|████▊ | 5817/12188 [12:27:18<13:06:39, 7.41s/it] {'loss': 0.3708, 'grad_norm': 0.706466634904955, 'learning_rate': 5.611002512387781e-06, 'epoch': 0.48} + 48%|████▊ | 5817/12188 [12:27:18<13:06:39, 7.41s/it] 48%|████▊ | 5818/12188 [12:27:26<13:02:40, 7.37s/it] {'loss': 0.3158, 'grad_norm': 0.6879024478873225, 'learning_rate': 5.609683742825078e-06, 'epoch': 0.48} + 48%|████▊ | 5818/12188 [12:27:26<13:02:40, 7.37s/it] 48%|████▊ | 5819/12188 [12:27:33<12:50:54, 7.26s/it] {'loss': 0.3534, 'grad_norm': 0.6377933801793106, 'learning_rate': 5.608364930207488e-06, 'epoch': 0.48} + 48%|████▊ | 5819/12188 [12:27:33<12:50:54, 7.26s/it] 48%|████▊ | 5820/12188 [12:27:40<12:42:59, 7.19s/it] {'loss': 0.37, 'grad_norm': 0.8269170948875589, 'learning_rate': 5.607046074628144e-06, 'epoch': 0.48} + 48%|████▊ | 5820/12188 [12:27:40<12:42:59, 7.19s/it] 48%|████▊ | 5821/12188 [12:27:47<12:57:24, 7.33s/it] {'loss': 0.269, 'grad_norm': 0.6810750274631096, 'learning_rate': 5.605727176180182e-06, 'epoch': 0.48} + 48%|████▊ | 5821/12188 [12:27:47<12:57:24, 7.33s/it] 48%|████▊ | 5822/12188 [12:27:54<12:36:52, 7.13s/it] {'loss': 0.3528, 'grad_norm': 0.7048896358941374, 'learning_rate': 5.604408234956741e-06, 'epoch': 0.48} + 48%|████▊ | 5822/12188 [12:27:54<12:36:52, 7.13s/it] 48%|████▊ | 5823/12188 [12:28:04<14:11:47, 8.03s/it] {'loss': 0.3169, 'grad_norm': 0.6635748010775592, 'learning_rate': 5.603089251050958e-06, 'epoch': 0.48} + 48%|████▊ | 5823/12188 [12:28:04<14:11:47, 8.03s/it] 48%|████▊ | 5824/12188 [12:28:11<13:49:30, 7.82s/it] {'loss': 0.3119, 'grad_norm': 0.7324038241864584, 'learning_rate': 5.601770224555985e-06, 'epoch': 0.48} + 48%|████▊ | 5824/12188 [12:28:11<13:49:30, 7.82s/it] 48%|████▊ | 5825/12188 [12:28:18<13:22:20, 7.57s/it] {'loss': 0.3264, 'grad_norm': 0.713291944180992, 'learning_rate': 5.600451155564964e-06, 'epoch': 0.48} + 48%|████▊ | 5825/12188 [12:28:18<13:22:20, 7.57s/it] 48%|████▊ | 5826/12188 [12:28:25<12:59:31, 7.35s/it] {'loss': 0.3446, 'grad_norm': 0.6870959972392929, 'learning_rate': 5.599132044171047e-06, 'epoch': 0.48} + 48%|████▊ | 5826/12188 [12:28:25<12:59:31, 7.35s/it] 48%|████▊ | 5827/12188 [12:28:32<12:37:23, 7.14s/it] {'loss': 0.3445, 'grad_norm': 0.9590619932074397, 'learning_rate': 5.597812890467387e-06, 'epoch': 0.48} + 48%|████▊ | 5827/12188 [12:28:32<12:37:23, 7.14s/it] 48%|████▊ | 5828/12188 [12:28:40<13:07:49, 7.43s/it] {'loss': 0.3105, 'grad_norm': 0.7831003764761274, 'learning_rate': 5.596493694547142e-06, 'epoch': 0.48} + 48%|████▊ | 5828/12188 [12:28:40<13:07:49, 7.43s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fe287509e90> +[Try #0] Failed to fetch sample 4728162 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fe287509e90> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Glossary'"}, {'from': 'gpt', 'value': '\nclick(x=0.865, y=0.2935)\n'}]} + 48%|████▊ | 5829/12188 [12:28:47<13:10:14, 7.46s/it] {'loss': 0.353, 'grad_norm': 0.6982972386664962, 'learning_rate': 5.59517445650347e-06, 'epoch': 0.48} + 48%|████▊ | 5829/12188 [12:28:47<13:10:14, 7.46s/it] 48%|████▊ | 5830/12188 [12:28:54<12:49:17, 7.26s/it] {'loss': 0.3403, 'grad_norm': 0.7226919706431887, 'learning_rate': 5.593855176429535e-06, 'epoch': 0.48} + 48%|████▊ | 5830/12188 [12:28:54<12:49:17, 7.26s/it] 48%|████▊ | 5831/12188 [12:29:01<12:44:31, 7.22s/it] {'loss': 0.34, 'grad_norm': 0.6806593624694443, 'learning_rate': 5.5925358544185e-06, 'epoch': 0.48} + 48%|████▊ | 5831/12188 [12:29:01<12:44:31, 7.22s/it] 48%|████▊ | 5832/12188 [12:29:08<12:35:46, 7.13s/it] {'loss': 0.3336, 'grad_norm': 0.7041799469926452, 'learning_rate': 5.591216490563537e-06, 'epoch': 0.48} + 48%|████▊ | 5832/12188 [12:29:08<12:35:46, 7.13s/it] 48%|████▊ | 5833/12188 [12:29:15<12:28:27, 7.07s/it] {'loss': 0.3592, 'grad_norm': 0.8710597323594446, 'learning_rate': 5.5898970849578135e-06, 'epoch': 0.48} + 48%|████▊ | 5833/12188 [12:29:15<12:28:27, 7.07s/it] 48%|████▊ | 5834/12188 [12:29:24<13:09:08, 7.45s/it] {'loss': 0.3008, 'grad_norm': 0.8489065654683358, 'learning_rate': 5.588577637694506e-06, 'epoch': 0.48} + 48%|████▊ | 5834/12188 [12:29:24<13:09:08, 7.45s/it] 48%|████▊ | 5835/12188 [12:29:32<13:45:46, 7.80s/it] {'loss': 0.3698, 'grad_norm': 0.7072048441702786, 'learning_rate': 5.5872581488667935e-06, 'epoch': 0.48} + 48%|████▊ | 5835/12188 [12:29:32<13:45:46, 7.80s/it] 48%|████▊ | 5836/12188 [12:29:39<13:25:16, 7.61s/it] {'loss': 0.3776, 'grad_norm': 0.7130703194595901, 'learning_rate': 5.58593861856785e-06, 'epoch': 0.48} + 48%|████▊ | 5836/12188 [12:29:39<13:25:16, 7.61s/it] 48%|████▊ | 5837/12188 [12:29:46<13:08:02, 7.44s/it] {'loss': 0.3675, 'grad_norm': 0.839146751117855, 'learning_rate': 5.584619046890866e-06, 'epoch': 0.48} + 48%|████▊ | 5837/12188 [12:29:46<13:08:02, 7.44s/it] 48%|████▊ | 5838/12188 [12:29:54<13:06:15, 7.43s/it] {'loss': 0.3299, 'grad_norm': 0.8106871431317682, 'learning_rate': 5.583299433929023e-06, 'epoch': 0.48} + 48%|████▊ | 5838/12188 [12:29:54<13:06:15, 7.43s/it] 48%|████▊ | 5839/12188 [12:30:00<12:36:35, 7.15s/it] {'loss': 0.3512, 'grad_norm': 0.7427592582468495, 'learning_rate': 5.581979779775512e-06, 'epoch': 0.48} + 48%|████▊ | 5839/12188 [12:30:00<12:36:35, 7.15s/it] 48%|████▊ | 5840/12188 [12:30:07<12:32:18, 7.11s/it] {'loss': 0.3211, 'grad_norm': 0.8105843207119885, 'learning_rate': 5.5806600845235225e-06, 'epoch': 0.48} + 48%|████▊ | 5840/12188 [12:30:07<12:32:18, 7.11s/it] 48%|████▊ | 5841/12188 [12:30:14<12:26:16, 7.05s/it] {'loss': 0.3255, 'grad_norm': 0.5993572619192716, 'learning_rate': 5.579340348266251e-06, 'epoch': 0.48} + 48%|████▊ | 5841/12188 [12:30:14<12:26:16, 7.05s/it] 48%|████▊ | 5842/12188 [12:30:22<12:33:43, 7.13s/it] {'loss': 0.3417, 'grad_norm': 0.7188691705772143, 'learning_rate': 5.578020571096895e-06, 'epoch': 0.48} + 48%|████▊ | 5842/12188 [12:30:22<12:33:43, 7.13s/it] 48%|████▊ | 5843/12188 [12:30:30<13:10:53, 7.48s/it] {'loss': 0.3328, 'grad_norm': 0.6662376603542941, 'learning_rate': 5.576700753108656e-06, 'epoch': 0.48} + 48%|████▊ | 5843/12188 [12:30:30<13:10:53, 7.48s/it] 48%|████▊ | 5844/12188 [12:30:37<12:46:17, 7.25s/it] {'loss': 0.3628, 'grad_norm': 0.6739622428307548, 'learning_rate': 5.5753808943947355e-06, 'epoch': 0.48} + 48%|████▊ | 5844/12188 [12:30:37<12:46:17, 7.25s/it] 48%|████▊ | 5845/12188 [12:30:43<12:32:28, 7.12s/it] {'loss': 0.3498, 'grad_norm': 0.7478133579230836, 'learning_rate': 5.574060995048341e-06, 'epoch': 0.48} + 48%|████▊ | 5845/12188 [12:30:43<12:32:28, 7.12s/it] 48%|████▊ | 5846/12188 [12:30:51<12:36:01, 7.15s/it] {'loss': 0.3314, 'grad_norm': 0.6330563441168316, 'learning_rate': 5.572741055162681e-06, 'epoch': 0.48} + 48%|████▊ | 5846/12188 [12:30:51<12:36:01, 7.15s/it] 48%|████▊ | 5847/12188 [12:30:57<12:26:55, 7.07s/it] {'loss': 0.3084, 'grad_norm': 0.6975282743506287, 'learning_rate': 5.571421074830967e-06, 'epoch': 0.48} + 48%|████▊ | 5847/12188 [12:30:58<12:26:55, 7.07s/it] 48%|████▊ | 5848/12188 [12:31:05<12:30:00, 7.10s/it] {'loss': 0.3149, 'grad_norm': 0.8517975633126432, 'learning_rate': 5.5701010541464165e-06, 'epoch': 0.48} + 48%|████▊ | 5848/12188 [12:31:05<12:30:00, 7.10s/it] 48%|████▊ | 5849/12188 [12:31:12<12:33:22, 7.13s/it] {'loss': 0.3355, 'grad_norm': 0.596449391657487, 'learning_rate': 5.568780993202244e-06, 'epoch': 0.48} + 48%|████▊ | 5849/12188 [12:31:12<12:33:22, 7.13s/it] 48%|████▊ | 5850/12188 [12:31:23<14:28:52, 8.23s/it] {'loss': 0.3047, 'grad_norm': 0.7185568980164502, 'learning_rate': 5.5674608920916715e-06, 'epoch': 0.48} + 48%|████▊ | 5850/12188 [12:31:23<14:28:52, 8.23s/it] 48%|████▊ | 5851/12188 [12:31:30<13:49:09, 7.85s/it] {'loss': 0.3071, 'grad_norm': 0.7168009685333881, 'learning_rate': 5.566140750907923e-06, 'epoch': 0.48} + 48%|████▊ | 5851/12188 [12:31:30<13:49:09, 7.85s/it] 48%|████▊ | 5852/12188 [12:31:37<13:49:16, 7.85s/it] {'loss': 0.3431, 'grad_norm': 0.6643973589376521, 'learning_rate': 5.564820569744225e-06, 'epoch': 0.48} + 48%|████▊ | 5852/12188 [12:31:37<13:49:16, 7.85s/it] 48%|████▊ | 5853/12188 [12:31:44<13:15:23, 7.53s/it] {'loss': 0.3651, 'grad_norm': 0.7653069856383651, 'learning_rate': 5.5635003486938066e-06, 'epoch': 0.48} + 48%|████▊ | 5853/12188 [12:31:44<13:15:23, 7.53s/it] 48%|████▊ | 5854/12188 [12:31:52<13:35:40, 7.73s/it] {'loss': 0.3351, 'grad_norm': 0.6724736592952175, 'learning_rate': 5.562180087849897e-06, 'epoch': 0.48} + 48%|████▊ | 5854/12188 [12:31:52<13:35:40, 7.73s/it] 48%|████▊ | 5855/12188 [12:32:00<13:17:12, 7.55s/it] {'loss': 0.3323, 'grad_norm': 0.8399929545713878, 'learning_rate': 5.560859787305733e-06, 'epoch': 0.48} + 48%|████▊ | 5855/12188 [12:32:00<13:17:12, 7.55s/it] 48%|████▊ | 5856/12188 [12:32:06<12:56:30, 7.36s/it] {'loss': 0.3318, 'grad_norm': 0.6956448956703046, 'learning_rate': 5.559539447154552e-06, 'epoch': 0.48} + 48%|████▊ | 5856/12188 [12:32:07<12:56:30, 7.36s/it] 48%|████▊ | 5857/12188 [12:32:14<12:47:31, 7.27s/it] {'loss': 0.3016, 'grad_norm': 1.0041623515145273, 'learning_rate': 5.558219067489594e-06, 'epoch': 0.48} + 48%|████▊ | 5857/12188 [12:32:14<12:47:31, 7.27s/it] 48%|████▊ | 5858/12188 [12:32:22<13:21:45, 7.60s/it] {'loss': 0.3077, 'grad_norm': 0.7011019679016505, 'learning_rate': 5.556898648404104e-06, 'epoch': 0.48} + 48%|████▊ | 5858/12188 [12:32:22<13:21:45, 7.60s/it] 48%|████▊ | 5859/12188 [12:32:29<12:57:36, 7.37s/it] {'loss': 0.3542, 'grad_norm': 0.7646111348580772, 'learning_rate': 5.555578189991324e-06, 'epoch': 0.48} + 48%|████▊ | 5859/12188 [12:32:29<12:57:36, 7.37s/it] 48%|████▊ | 5860/12188 [12:32:36<12:56:47, 7.37s/it] {'loss': 0.3368, 'grad_norm': 0.6639121648571633, 'learning_rate': 5.554257692344507e-06, 'epoch': 0.48} + 48%|████▊ | 5860/12188 [12:32:36<12:56:47, 7.37s/it] 48%|████▊ | 5861/12188 [12:32:43<12:44:50, 7.25s/it] {'loss': 0.3368, 'grad_norm': 0.6606963317400342, 'learning_rate': 5.552937155556901e-06, 'epoch': 0.48} + 48%|████▊ | 5861/12188 [12:32:43<12:44:50, 7.25s/it] 48%|████▊ | 5862/12188 [12:32:52<13:42:26, 7.80s/it] {'loss': 0.3209, 'grad_norm': 0.7987879916261934, 'learning_rate': 5.5516165797217615e-06, 'epoch': 0.48} + 48%|████▊ | 5862/12188 [12:32:52<13:42:26, 7.80s/it] 48%|████▊ | 5863/12188 [12:33:00<13:36:54, 7.75s/it] {'loss': 0.3148, 'grad_norm': 0.627885450814271, 'learning_rate': 5.550295964932344e-06, 'epoch': 0.48} + 48%|████▊ | 5863/12188 [12:33:00<13:36:54, 7.75s/it] 48%|████▊ | 5864/12188 [12:33:07<13:12:43, 7.52s/it] {'loss': 0.3854, 'grad_norm': 0.7064505181226096, 'learning_rate': 5.548975311281911e-06, 'epoch': 0.48} + 48%|████▊ | 5864/12188 [12:33:07<13:12:43, 7.52s/it] 48%|████▊ | 5865/12188 [12:33:13<12:41:13, 7.22s/it] {'loss': 0.3293, 'grad_norm': 0.6972311013154606, 'learning_rate': 5.5476546188637235e-06, 'epoch': 0.48} + 48%|████▊ | 5865/12188 [12:33:13<12:41:13, 7.22s/it] 48%|████▊ | 5866/12188 [12:33:20<12:29:57, 7.12s/it] {'loss': 0.3202, 'grad_norm': 0.794610204621401, 'learning_rate': 5.546333887771047e-06, 'epoch': 0.48} + 48%|████▊ | 5866/12188 [12:33:20<12:29:57, 7.12s/it] 48%|████▊ | 5867/12188 [12:33:27<12:21:09, 7.04s/it] {'loss': 0.311, 'grad_norm': 0.8544085075226078, 'learning_rate': 5.54501311809715e-06, 'epoch': 0.48} + 48%|████▊ | 5867/12188 [12:33:27<12:21:09, 7.04s/it] 48%|████▊ | 5868/12188 [12:33:35<12:39:39, 7.21s/it] {'loss': 0.3479, 'grad_norm': 0.6520828599499673, 'learning_rate': 5.5436923099353005e-06, 'epoch': 0.48} + 48%|████▊ | 5868/12188 [12:33:35<12:39:39, 7.21s/it] 48%|████▊ | 5869/12188 [12:33:42<12:46:14, 7.28s/it] {'loss': 0.3493, 'grad_norm': 0.8547429970933862, 'learning_rate': 5.542371463378773e-06, 'epoch': 0.48} + 48%|████▊ | 5869/12188 [12:33:42<12:46:14, 7.28s/it] 48%|████▊ | 5870/12188 [12:33:50<13:09:01, 7.49s/it] {'loss': 0.3414, 'grad_norm': 0.6397048134741544, 'learning_rate': 5.541050578520846e-06, 'epoch': 0.48} + 48%|████▊ | 5870/12188 [12:33:50<13:09:01, 7.49s/it] 48%|████▊ | 5871/12188 [12:33:57<12:44:59, 7.27s/it] {'loss': 0.3602, 'grad_norm': 0.7562019039907907, 'learning_rate': 5.539729655454795e-06, 'epoch': 0.48} + 48%|████▊ | 5871/12188 [12:33:57<12:44:59, 7.27s/it] 48%|████▊ | 5872/12188 [12:34:04<12:39:27, 7.21s/it] {'loss': 0.3282, 'grad_norm': 0.7418216912404008, 'learning_rate': 5.538408694273902e-06, 'epoch': 0.48} + 48%|████▊ | 5872/12188 [12:34:04<12:39:27, 7.21s/it] 48%|████▊ | 5873/12188 [12:34:12<12:53:34, 7.35s/it] {'loss': 0.3899, 'grad_norm': 0.6934666654962214, 'learning_rate': 5.5370876950714555e-06, 'epoch': 0.48} + 48%|████▊ | 5873/12188 [12:34:12<12:53:34, 7.35s/it] 48%|████▊ | 5874/12188 [12:34:20<13:40:01, 7.79s/it] {'loss': 0.3281, 'grad_norm': 0.7334228867111794, 'learning_rate': 5.535766657940737e-06, 'epoch': 0.48} + 48%|████▊ | 5874/12188 [12:34:20<13:40:01, 7.79s/it] 48%|████▊ | 5875/12188 [12:34:28<13:46:35, 7.86s/it] {'loss': 0.3277, 'grad_norm': 1.0688747931956895, 'learning_rate': 5.534445582975038e-06, 'epoch': 0.48} + 48%|████▊ | 5875/12188 [12:34:28<13:46:35, 7.86s/it] 48%|████▊ | 5876/12188 [12:34:35<13:06:52, 7.48s/it] {'loss': 0.3554, 'grad_norm': 0.6972486673632109, 'learning_rate': 5.53312447026765e-06, 'epoch': 0.48} + 48%|████▊ | 5876/12188 [12:34:35<13:06:52, 7.48s/it] 48%|████▊ | 5877/12188 [12:34:43<13:09:09, 7.50s/it] {'loss': 0.3091, 'grad_norm': 0.686434028121366, 'learning_rate': 5.531803319911868e-06, 'epoch': 0.48} + 48%|████▊ | 5877/12188 [12:34:43<13:09:09, 7.50s/it] 48%|████▊ | 5878/12188 [12:34:50<13:12:45, 7.54s/it] {'loss': 0.3454, 'grad_norm': 1.3017686310936936, 'learning_rate': 5.530482132000991e-06, 'epoch': 0.48} + 48%|████▊ | 5878/12188 [12:34:50<13:12:45, 7.54s/it] 48%|████▊ | 5879/12188 [12:34:57<12:58:49, 7.41s/it] {'loss': 0.3926, 'grad_norm': 0.6762384144058724, 'learning_rate': 5.529160906628318e-06, 'epoch': 0.48} + 48%|████▊ | 5879/12188 [12:34:57<12:58:49, 7.41s/it] 48%|████▊ | 5880/12188 [12:35:04<12:36:54, 7.20s/it] {'loss': 0.322, 'grad_norm': 0.7474371827719036, 'learning_rate': 5.5278396438871516e-06, 'epoch': 0.48} + 48%|████▊ | 5880/12188 [12:35:04<12:36:54, 7.20s/it] 48%|████▊ | 5881/12188 [12:35:12<13:11:08, 7.53s/it] {'loss': 0.309, 'grad_norm': 0.6838525117016092, 'learning_rate': 5.526518343870798e-06, 'epoch': 0.48} + 48%|████▊ | 5881/12188 [12:35:12<13:11:08, 7.53s/it] 48%|████▊ | 5882/12188 [12:35:19<12:51:25, 7.34s/it] {'loss': 0.3199, 'grad_norm': 0.6911322879658597, 'learning_rate': 5.525197006672565e-06, 'epoch': 0.48} + 48%|████▊ | 5882/12188 [12:35:19<12:51:25, 7.34s/it] 48%|████▊ | 5883/12188 [12:35:26<12:44:30, 7.28s/it] {'loss': 0.3242, 'grad_norm': 0.6640910159533756, 'learning_rate': 5.523875632385762e-06, 'epoch': 0.48} + 48%|████▊ | 5883/12188 [12:35:26<12:44:30, 7.28s/it] 48%|████▊ | 5884/12188 [12:35:33<12:28:35, 7.12s/it] {'loss': 0.3282, 'grad_norm': 0.7051032973983383, 'learning_rate': 5.522554221103705e-06, 'epoch': 0.48} + 48%|████▊ | 5884/12188 [12:35:33<12:28:35, 7.12s/it] 48%|████▊ | 5885/12188 [12:35:40<12:27:33, 7.12s/it] {'loss': 0.3372, 'grad_norm': 0.6869162977231653, 'learning_rate': 5.521232772919708e-06, 'epoch': 0.48} + 48%|████▊ | 5885/12188 [12:35:40<12:27:33, 7.12s/it] 48%|████▊ | 5886/12188 [12:35:47<12:28:14, 7.12s/it] {'loss': 0.3368, 'grad_norm': 0.879541939780528, 'learning_rate': 5.519911287927089e-06, 'epoch': 0.48} + 48%|████▊ | 5886/12188 [12:35:47<12:28:14, 7.12s/it] 48%|████▊ | 5887/12188 [12:35:56<13:05:53, 7.48s/it] {'loss': 0.3092, 'grad_norm': 0.7021724220981687, 'learning_rate': 5.518589766219173e-06, 'epoch': 0.48} + 48%|████▊ | 5887/12188 [12:35:56<13:05:53, 7.48s/it] 48%|████▊ | 5888/12188 [12:36:02<12:40:50, 7.25s/it] {'loss': 0.332, 'grad_norm': 0.6642643657634221, 'learning_rate': 5.517268207889278e-06, 'epoch': 0.48} + 48%|████▊ | 5888/12188 [12:36:02<12:40:50, 7.25s/it] 48%|████▊ | 5889/12188 [12:36:11<13:37:08, 7.78s/it] {'loss': 0.3297, 'grad_norm': 0.7098895361922243, 'learning_rate': 5.515946613030734e-06, 'epoch': 0.48} + 48%|████▊ | 5889/12188 [12:36:11<13:37:08, 7.78s/it] 48%|████▊ | 5890/12188 [12:36:19<13:17:32, 7.60s/it] {'loss': 0.3105, 'grad_norm': 0.7526916630547397, 'learning_rate': 5.514624981736868e-06, 'epoch': 0.48} + 48%|████▊ | 5890/12188 [12:36:19<13:17:32, 7.60s/it] 48%|████▊ | 5891/12188 [12:36:25<12:54:02, 7.38s/it] {'loss': 0.3462, 'grad_norm': 0.7446035483210831, 'learning_rate': 5.513303314101014e-06, 'epoch': 0.48} + 48%|████▊ | 5891/12188 [12:36:25<12:54:02, 7.38s/it] 48%|████▊ | 5892/12188 [12:36:32<12:37:39, 7.22s/it] {'loss': 0.3346, 'grad_norm': 0.6721148830955618, 'learning_rate': 5.511981610216505e-06, 'epoch': 0.48} + 48%|████▊ | 5892/12188 [12:36:32<12:37:39, 7.22s/it] 48%|████▊ | 5893/12188 [12:36:39<12:37:10, 7.22s/it] {'loss': 0.3466, 'grad_norm': 0.6791299065039735, 'learning_rate': 5.510659870176677e-06, 'epoch': 0.48} + 48%|████▊ | 5893/12188 [12:36:40<12:37:10, 7.22s/it] 48%|████▊ | 5894/12188 [12:36:46<12:30:05, 7.15s/it] {'loss': 0.3289, 'grad_norm': 0.6835186766180341, 'learning_rate': 5.509338094074869e-06, 'epoch': 0.48} + 48%|████▊ | 5894/12188 [12:36:47<12:30:05, 7.15s/it] 48%|████▊ | 5895/12188 [12:36:53<12:24:44, 7.10s/it] {'loss': 0.3206, 'grad_norm': 0.6944111435609946, 'learning_rate': 5.508016282004422e-06, 'epoch': 0.48} + 48%|████▊ | 5895/12188 [12:36:53<12:24:44, 7.10s/it] 48%|████▊ | 5896/12188 [12:37:04<14:07:44, 8.08s/it] {'loss': 0.2957, 'grad_norm': 0.6597400884066319, 'learning_rate': 5.506694434058683e-06, 'epoch': 0.48} + 48%|████▊ | 5896/12188 [12:37:04<14:07:44, 8.08s/it] 48%|████▊ | 5897/12188 [12:37:10<13:21:41, 7.65s/it] {'loss': 0.3239, 'grad_norm': 0.6556973159083284, 'learning_rate': 5.505372550330997e-06, 'epoch': 0.48} + 48%|████▊ | 5897/12188 [12:37:10<13:21:41, 7.65s/it] 48%|████▊ | 5898/12188 [12:37:18<13:05:56, 7.50s/it] {'loss': 0.3156, 'grad_norm': 0.6709615202792696, 'learning_rate': 5.504050630914714e-06, 'epoch': 0.48} + 48%|████▊ | 5898/12188 [12:37:18<13:05:56, 7.50s/it] 48%|████▊ | 5899/12188 [12:37:24<12:29:13, 7.15s/it] {'loss': 0.3133, 'grad_norm': 0.6653448554622636, 'learning_rate': 5.5027286759031825e-06, 'epoch': 0.48} + 48%|████▊ | 5899/12188 [12:37:24<12:29:13, 7.15s/it] 48%|████▊ | 5900/12188 [12:37:31<12:38:29, 7.24s/it] {'loss': 0.375, 'grad_norm': 0.6404608068769828, 'learning_rate': 5.501406685389762e-06, 'epoch': 0.48} + 48%|████▊ | 5900/12188 [12:37:31<12:38:29, 7.24s/it] 48%|████▊ | 5901/12188 [12:37:39<12:54:57, 7.40s/it] {'loss': 0.3295, 'grad_norm': 0.6393399155251961, 'learning_rate': 5.500084659467807e-06, 'epoch': 0.48} + 48%|████▊ | 5901/12188 [12:37:39<12:54:57, 7.40s/it] 48%|████▊ | 5902/12188 [12:37:46<12:38:38, 7.24s/it] {'loss': 0.3078, 'grad_norm': 0.6715515814150057, 'learning_rate': 5.4987625982306765e-06, 'epoch': 0.48} + 48%|████▊ | 5902/12188 [12:37:46<12:38:38, 7.24s/it] 48%|████▊ | 5903/12188 [12:37:54<12:46:24, 7.32s/it] {'loss': 0.3252, 'grad_norm': 0.9020843978606343, 'learning_rate': 5.497440501771732e-06, 'epoch': 0.48} + 48%|████▊ | 5903/12188 [12:37:54<12:46:24, 7.32s/it] 48%|████▊ | 5904/12188 [12:38:01<12:59:51, 7.45s/it] {'loss': 0.353, 'grad_norm': 0.848469533981476, 'learning_rate': 5.496118370184339e-06, 'epoch': 0.48} + 48%|████▊ | 5904/12188 [12:38:01<12:59:51, 7.45s/it] 48%|████▊ | 5905/12188 [12:38:09<13:02:00, 7.47s/it] {'loss': 0.3244, 'grad_norm': 0.8853815169686621, 'learning_rate': 5.4947962035618626e-06, 'epoch': 0.48} + 48%|████▊ | 5905/12188 [12:38:09<13:02:00, 7.47s/it] 48%|████▊ | 5906/12188 [12:38:16<12:58:19, 7.43s/it] {'loss': 0.344, 'grad_norm': 0.6816405599103, 'learning_rate': 5.493474001997675e-06, 'epoch': 0.48} + 48%|████▊ | 5906/12188 [12:38:16<12:58:19, 7.43s/it] 48%|████▊ | 5907/12188 [12:38:23<12:49:05, 7.35s/it] {'loss': 0.355, 'grad_norm': 0.6499035666270017, 'learning_rate': 5.4921517655851456e-06, 'epoch': 0.48} + 48%|████▊ | 5907/12188 [12:38:23<12:49:05, 7.35s/it] 48%|████▊ | 5908/12188 [12:38:31<12:54:27, 7.40s/it] {'loss': 0.3263, 'grad_norm': 0.6871782261652233, 'learning_rate': 5.490829494417649e-06, 'epoch': 0.48} + 48%|████▊ | 5908/12188 [12:38:31<12:54:27, 7.40s/it] 48%|████▊ | 5909/12188 [12:38:38<12:34:58, 7.21s/it] {'loss': 0.3002, 'grad_norm': 0.8548358096578865, 'learning_rate': 5.489507188588562e-06, 'epoch': 0.48} + 48%|█��██▊ | 5909/12188 [12:38:38<12:34:58, 7.21s/it] 48%|████▊ | 5910/12188 [12:38:46<12:57:48, 7.43s/it] {'loss': 0.3347, 'grad_norm': 0.7273205159467313, 'learning_rate': 5.488184848191265e-06, 'epoch': 0.48} + 48%|████▊ | 5910/12188 [12:38:46<12:57:48, 7.43s/it] 48%|████▊ | 5911/12188 [12:38:53<12:45:40, 7.32s/it] {'loss': 0.3407, 'grad_norm': 0.7029351643126349, 'learning_rate': 5.486862473319137e-06, 'epoch': 0.48} + 48%|████▊ | 5911/12188 [12:38:53<12:45:40, 7.32s/it] 49%|████▊ | 5912/12188 [12:39:00<12:47:06, 7.33s/it] {'loss': 0.3269, 'grad_norm': 0.7020731817113454, 'learning_rate': 5.485540064065563e-06, 'epoch': 0.49} + 49%|████▊ | 5912/12188 [12:39:00<12:47:06, 7.33s/it] 49%|████▊ | 5913/12188 [12:39:08<12:56:37, 7.43s/it] {'loss': 0.3602, 'grad_norm': 0.7006233774815915, 'learning_rate': 5.48421762052393e-06, 'epoch': 0.49} + 49%|████▊ | 5913/12188 [12:39:08<12:56:37, 7.43s/it] 49%|████▊ | 5914/12188 [12:39:15<12:47:42, 7.34s/it] {'loss': 0.301, 'grad_norm': 0.6114954601302944, 'learning_rate': 5.482895142787626e-06, 'epoch': 0.49} + 49%|████▊ | 5914/12188 [12:39:15<12:47:42, 7.34s/it] 49%|████▊ | 5915/12188 [12:39:22<12:42:39, 7.29s/it] {'loss': 0.3744, 'grad_norm': 0.670885207925429, 'learning_rate': 5.481572630950046e-06, 'epoch': 0.49} + 49%|████▊ | 5915/12188 [12:39:22<12:42:39, 7.29s/it] 49%|████▊ | 5916/12188 [12:39:29<12:30:16, 7.18s/it] {'loss': 0.3327, 'grad_norm': 0.6989005313306871, 'learning_rate': 5.480250085104578e-06, 'epoch': 0.49} + 49%|████▊ | 5916/12188 [12:39:29<12:30:16, 7.18s/it] 49%|████▊ | 5917/12188 [12:39:36<12:19:33, 7.08s/it] {'loss': 0.3786, 'grad_norm': 0.6759939145153354, 'learning_rate': 5.47892750534462e-06, 'epoch': 0.49} + 49%|████▊ | 5917/12188 [12:39:36<12:19:33, 7.08s/it] 49%|████▊ | 5918/12188 [12:39:43<12:14:19, 7.03s/it] {'loss': 0.3225, 'grad_norm': 0.7453123616951471, 'learning_rate': 5.477604891763573e-06, 'epoch': 0.49} + 49%|████▊ | 5918/12188 [12:39:43<12:14:19, 7.03s/it] 49%|████▊ | 5919/12188 [12:39:50<12:12:01, 7.01s/it] {'loss': 0.3339, 'grad_norm': 0.8373622799802783, 'learning_rate': 5.476282244454835e-06, 'epoch': 0.49} + 49%|████▊ | 5919/12188 [12:39:50<12:12:01, 7.01s/it] 49%|████▊ | 5920/12188 [12:39:57<12:11:57, 7.01s/it] {'loss': 0.3734, 'grad_norm': 0.6536431992704383, 'learning_rate': 5.4749595635118095e-06, 'epoch': 0.49} + 49%|████▊ | 5920/12188 [12:39:57<12:11:57, 7.01s/it] 49%|████▊ | 5921/12188 [12:40:05<12:43:34, 7.31s/it] {'loss': 0.3177, 'grad_norm': 0.6208002677950261, 'learning_rate': 5.4736368490279035e-06, 'epoch': 0.49} + 49%|████▊ | 5921/12188 [12:40:05<12:43:34, 7.31s/it] 49%|████▊ | 5922/12188 [12:40:15<14:09:26, 8.13s/it] {'loss': 0.3508, 'grad_norm': 0.6303830401580974, 'learning_rate': 5.472314101096523e-06, 'epoch': 0.49} + 49%|████▊ | 5922/12188 [12:40:15<14:09:26, 8.13s/it] 49%|████▊ | 5923/12188 [12:40:22<13:40:02, 7.85s/it] {'loss': 0.3233, 'grad_norm': 0.860686955689994, 'learning_rate': 5.470991319811081e-06, 'epoch': 0.49} + 49%|████▊ | 5923/12188 [12:40:22<13:40:02, 7.85s/it] 49%|████▊ | 5924/12188 [12:40:29<13:04:05, 7.51s/it] {'loss': 0.3408, 'grad_norm': 0.7058584164138638, 'learning_rate': 5.469668505264988e-06, 'epoch': 0.49} + 49%|████▊ | 5924/12188 [12:40:29<13:04:05, 7.51s/it] 49%|████▊ | 5925/12188 [12:40:36<13:08:34, 7.55s/it] {'loss': 0.3257, 'grad_norm': 0.6960648084882163, 'learning_rate': 5.468345657551659e-06, 'epoch': 0.49} + 49%|████▊ | 5925/12188 [12:40:36<13:08:34, 7.55s/it] 49%|████▊ | 5926/12188 [12:40:44<13:00:59, 7.48s/it] {'loss': 0.3633, 'grad_norm': 0.6950950337237048, 'learning_rate': 5.467022776764512e-06, 'epoch': 0.49} + 49%|████▊ | 5926/12188 [12:40:44<13:00:59, 7.48s/it] 49%|████▊ | 5927/12188 [12:40:50<12:40:30, 7.29s/it] {'loss': 0.3764, 'grad_norm': 0.7283644322978563, 'learning_rate': 5.4656998629969685e-06, 'epoch': 0.49} + 49%|████▊ | 5927/12188 [12:40:50<12:40:30, 7.29s/it] 49%|████▊ | 5928/12188 [12:40:57<12:31:47, 7.21s/it] {'loss': 0.3174, 'grad_norm': 0.72475490794421, 'learning_rate': 5.464376916342447e-06, 'epoch': 0.49} + 49%|████▊ | 5928/12188 [12:40:57<12:31:47, 7.21s/it] 49%|████▊ | 5929/12188 [12:41:05<12:32:47, 7.22s/it] {'loss': 0.3265, 'grad_norm': 0.6742302293385076, 'learning_rate': 5.463053936894377e-06, 'epoch': 0.49} + 49%|████▊ | 5929/12188 [12:41:05<12:32:47, 7.22s/it] 49%|████▊ | 5930/12188 [12:41:11<12:20:26, 7.10s/it] {'loss': 0.3421, 'grad_norm': 0.6821001875094584, 'learning_rate': 5.461730924746181e-06, 'epoch': 0.49} + 49%|████▊ | 5930/12188 [12:41:11<12:20:26, 7.10s/it] 49%|████▊ | 5931/12188 [12:41:19<12:37:18, 7.26s/it] {'loss': 0.3472, 'grad_norm': 0.9026259083443373, 'learning_rate': 5.460407879991285e-06, 'epoch': 0.49} + 49%|████▊ | 5931/12188 [12:41:19<12:37:18, 7.26s/it] 49%|████▊ | 5932/12188 [12:41:26<12:26:38, 7.16s/it] {'loss': 0.3015, 'grad_norm': 0.7191333723644797, 'learning_rate': 5.459084802723128e-06, 'epoch': 0.49} + 49%|████▊ | 5932/12188 [12:41:26<12:26:38, 7.16s/it] 49%|████▊ | 5933/12188 [12:41:33<12:22:50, 7.13s/it] {'loss': 0.3291, 'grad_norm': 0.8367875688776854, 'learning_rate': 5.457761693035139e-06, 'epoch': 0.49} + 49%|████▊ | 5933/12188 [12:41:33<12:22:50, 7.13s/it] 49%|████▊ | 5934/12188 [12:41:42<13:06:32, 7.55s/it] {'loss': 0.3091, 'grad_norm': 0.712999670393838, 'learning_rate': 5.456438551020754e-06, 'epoch': 0.49} + 49%|████▊ | 5934/12188 [12:41:42<13:06:32, 7.55s/it] 49%|████▊ | 5935/12188 [12:41:48<12:37:39, 7.27s/it] {'loss': 0.3512, 'grad_norm': 0.6511819782837653, 'learning_rate': 5.455115376773412e-06, 'epoch': 0.49} + 49%|████▊ | 5935/12188 [12:41:48<12:37:39, 7.27s/it] 49%|████▊ | 5936/12188 [12:41:55<12:30:44, 7.20s/it] {'loss': 0.3502, 'grad_norm': 0.8562991141173595, 'learning_rate': 5.453792170386554e-06, 'epoch': 0.49} + 49%|████▊ | 5936/12188 [12:41:55<12:30:44, 7.20s/it] 49%|████▊ | 5937/12188 [12:42:04<13:15:11, 7.63s/it] {'loss': 0.3086, 'grad_norm': 0.6309868091011173, 'learning_rate': 5.452468931953622e-06, 'epoch': 0.49} + 49%|████▊ | 5937/12188 [12:42:04<13:15:11, 7.63s/it] 49%|████▊ | 5938/12188 [12:42:11<13:06:53, 7.55s/it] {'loss': 0.3688, 'grad_norm': 0.9311776174353888, 'learning_rate': 5.45114566156806e-06, 'epoch': 0.49} + 49%|████▊ | 5938/12188 [12:42:11<13:06:53, 7.55s/it] 49%|████▊ | 5939/12188 [12:42:19<13:00:28, 7.49s/it] {'loss': 0.3358, 'grad_norm': 0.9003723581613301, 'learning_rate': 5.449822359323317e-06, 'epoch': 0.49} + 49%|████▊ | 5939/12188 [12:42:19<13:00:28, 7.49s/it] 49%|████▊ | 5940/12188 [12:42:25<12:36:50, 7.27s/it] {'loss': 0.312, 'grad_norm': 1.0572113387189237, 'learning_rate': 5.44849902531284e-06, 'epoch': 0.49} + 49%|████▊ | 5940/12188 [12:42:25<12:36:50, 7.27s/it] 49%|████▊ | 5941/12188 [12:42:33<13:01:56, 7.51s/it] {'loss': 0.3191, 'grad_norm': 0.9263548033955961, 'learning_rate': 5.447175659630084e-06, 'epoch': 0.49} + 49%|████▊ | 5941/12188 [12:42:33<13:01:56, 7.51s/it] 49%|████▉ | 5942/12188 [12:42:40<12:26:31, 7.17s/it] {'loss': 0.3307, 'grad_norm': 1.7929792260933854, 'learning_rate': 5.445852262368501e-06, 'epoch': 0.49} + 49%|████▉ | 5942/12188 [12:42:40<12:26:31, 7.17s/it] 49%|████▉ | 5943/12188 [12:42:47<12:16:35, 7.08s/it] {'loss': 0.3412, 'grad_norm': 0.6852857198901435, 'learning_rate': 5.444528833621549e-06, 'epoch': 0.49} + 49%|████▉ | 5943/12188 [12:42:47<12:16:35, 7.08s/it] 49%|████▉ | 5944/12188 [12:42:55<12:42:36, 7.33s/it] {'loss': 0.3342, 'grad_norm': 0.6981393990983255, 'learning_rate': 5.443205373482683e-06, 'epoch': 0.49} + 49%|████▉ | 5944/12188 [12:42:55<12:42:36, 7.33s/it] 49%|████▉ | 5945/12188 [12:43:01<12:27:59, 7.19s/it] {'loss': 0.3375, 'grad_norm': 0.9595926302983052, 'learning_rate': 5.441881882045366e-06, 'epoch': 0.49} + 49%|████▉ | 5945/12188 [12:43:01<12:27:59, 7.19s/it] 49%|████▉ | 5946/12188 [12:43:10<13:06:50, 7.56s/it] {'loss': 0.3429, 'grad_norm': 0.6792422498394335, 'learning_rate': 5.440558359403061e-06, 'epoch': 0.49} + 49%|████▉ | 5946/12188 [12:43:10<13:06:50, 7.56s/it] 49%|████▉ | 5947/12188 [12:43:17<12:47:32, 7.38s/it] {'loss': 0.3149, 'grad_norm': 0.7303786146464281, 'learning_rate': 5.439234805649232e-06, 'epoch': 0.49} + 49%|████▉ | 5947/12188 [12:43:17<12:47:32, 7.38s/it] 49%|████▉ | 5948/12188 [12:43:24<12:43:37, 7.34s/it] {'loss': 0.3182, 'grad_norm': 0.7264856885020243, 'learning_rate': 5.437911220877346e-06, 'epoch': 0.49} + 49%|████▉ | 5948/12188 [12:43:24<12:43:37, 7.34s/it] 49%|████▉ | 5949/12188 [12:43:31<12:42:05, 7.33s/it] {'loss': 0.3537, 'grad_norm': 0.8591064176375152, 'learning_rate': 5.436587605180875e-06, 'epoch': 0.49} + 49%|████▉ | 5949/12188 [12:43:31<12:42:05, 7.33s/it] 49%|████▉ | 5950/12188 [12:43:38<12:24:54, 7.16s/it] {'loss': 0.3361, 'grad_norm': 0.747396185425496, 'learning_rate': 5.435263958653287e-06, 'epoch': 0.49} + 49%|████▉ | 5950/12188 [12:43:38<12:24:54, 7.16s/it] 49%|████▉ | 5951/12188 [12:43:45<12:19:16, 7.11s/it] {'loss': 0.3152, 'grad_norm': 0.6936321637994561, 'learning_rate': 5.433940281388059e-06, 'epoch': 0.49} + 49%|████▉ | 5951/12188 [12:43:45<12:19:16, 7.11s/it] 49%|████▉ | 5952/12188 [12:43:52<12:15:48, 7.08s/it] {'loss': 0.3399, 'grad_norm': 0.7316760709677798, 'learning_rate': 5.432616573478665e-06, 'epoch': 0.49} + 49%|████▉ | 5952/12188 [12:43:52<12:15:48, 7.08s/it] 49%|████▉ | 5953/12188 [12:43:59<12:01:00, 6.94s/it] {'loss': 0.3426, 'grad_norm': 1.07565946485485, 'learning_rate': 5.4312928350185826e-06, 'epoch': 0.49} + 49%|████▉ | 5953/12188 [12:43:59<12:01:00, 6.94s/it] 49%|████▉ | 5954/12188 [12:44:07<12:26:21, 7.18s/it] {'loss': 0.3276, 'grad_norm': 0.6438669454318987, 'learning_rate': 5.429969066101294e-06, 'epoch': 0.49} + 49%|████▉ | 5954/12188 [12:44:07<12:26:21, 7.18s/it] 49%|████▉ | 5955/12188 [12:44:14<12:22:09, 7.14s/it] {'loss': 0.3163, 'grad_norm': 0.6394467971435926, 'learning_rate': 5.428645266820281e-06, 'epoch': 0.49} + 49%|████▉ | 5955/12188 [12:44:14<12:22:09, 7.14s/it] 49%|████▉ | 5956/12188 [12:44:20<12:14:56, 7.08s/it] {'loss': 0.3226, 'grad_norm': 0.679423844398113, 'learning_rate': 5.427321437269027e-06, 'epoch': 0.49} + 49%|████▉ | 5956/12188 [12:44:21<12:14:56, 7.08s/it] 49%|████▉ | 5957/12188 [12:44:27<12:07:10, 7.00s/it] {'loss': 0.275, 'grad_norm': 0.6868827336728175, 'learning_rate': 5.425997577541022e-06, 'epoch': 0.49} + 49%|████▉ | 5957/12188 [12:44:27<12:07:10, 7.00s/it] 49%|████▉ | 5958/12188 [12:44:34<12:00:05, 6.94s/it] {'loss': 0.3214, 'grad_norm': 0.7780450410307794, 'learning_rate': 5.42467368772975e-06, 'epoch': 0.49} + 49%|████▉ | 5958/12188 [12:44:34<12:00:05, 6.94s/it] 49%|████▉ | 5959/12188 [12:44:41<12:08:52, 7.02s/it] {'loss': 0.3481, 'grad_norm': 0.6668393994346512, 'learning_rate': 5.423349767928705e-06, 'epoch': 0.49} + 49%|████▉ | 5959/12188 [12:44:41<12:08:52, 7.02s/it] 49%|████▉ | 5960/12188 [12:44:52<14:09:41, 8.19s/it] {'loss': 0.3523, 'grad_norm': 0.6641863159636754, 'learning_rate': 5.42202581823138e-06, 'epoch': 0.49} + 49%|████▉ | 5960/12188 [12:44:52<14:09:41, 8.19s/it] 49%|████▉ | 5961/12188 [12:45:00<13:50:39, 8.00s/it] {'loss': 0.3364, 'grad_norm': 0.7153028275308563, 'learning_rate': 5.4207018387312695e-06, 'epoch': 0.49} + 49%|████▉ | 5961/12188 [12:45:00<13:50:39, 8.00s/it] 49%|████▉ | 5962/12188 [12:45:07<13:31:11, 7.82s/it] {'loss': 0.3291, 'grad_norm': 0.7092205182071456, 'learning_rate': 5.41937782952187e-06, 'epoch': 0.49} + 49%|████▉ | 5962/12188 [12:45:07<13:31:11, 7.82s/it] 49%|████▉ | 5963/12188 [12:45:15<13:15:27, 7.67s/it] {'loss': 0.2997, 'grad_norm': 0.8007451602906069, 'learning_rate': 5.418053790696685e-06, 'epoch': 0.49} + 49%|████▉ | 5963/12188 [12:45:15<13:15:27, 7.67s/it] 49%|████▉ | 5964/12188 [12:45:21<12:48:26, 7.41s/it] {'loss': 0.3428, 'grad_norm': 0.666871152415175, 'learning_rate': 5.41672972234921e-06, 'epoch': 0.49} + 49%|████▉ | 5964/12188 [12:45:21<12:48:26, 7.41s/it] 49%|████▉ | 5965/12188 [12:45:28<12:29:46, 7.23s/it] {'loss': 0.3542, 'grad_norm': 0.8906622876586006, 'learning_rate': 5.415405624572953e-06, 'epoch': 0.49} + 49%|████▉ | 5965/12188 [12:45:28<12:29:46, 7.23s/it] 49%|████▉ | 5966/12188 [12:45:36<12:40:52, 7.34s/it] {'loss': 0.3435, 'grad_norm': 0.6123522129579323, 'learning_rate': 5.4140814974614175e-06, 'epoch': 0.49} + 49%|████▉ | 5966/12188 [12:45:36<12:40:52, 7.34s/it] 49%|████▉ | 5967/12188 [12:45:42<12:21:23, 7.15s/it] {'loss': 0.3283, 'grad_norm': 0.9263203127455067, 'learning_rate': 5.412757341108111e-06, 'epoch': 0.49} + 49%|████▉ | 5967/12188 [12:45:42<12:21:23, 7.15s/it] 49%|████▉ | 5968/12188 [12:45:50<12:29:31, 7.23s/it] {'loss': 0.3652, 'grad_norm': 0.6919976046941833, 'learning_rate': 5.4114331556065455e-06, 'epoch': 0.49} + 49%|████▉ | 5968/12188 [12:45:50<12:29:31, 7.23s/it] 49%|████▉ | 5969/12188 [12:45:57<12:31:19, 7.25s/it] {'loss': 0.3371, 'grad_norm': 0.6362444826933681, 'learning_rate': 5.410108941050232e-06, 'epoch': 0.49} + 49%|████▉ | 5969/12188 [12:45:57<12:31:19, 7.25s/it] 49%|████▉ | 5970/12188 [12:46:05<12:54:36, 7.47s/it] {'loss': 0.2928, 'grad_norm': 0.7361893077733512, 'learning_rate': 5.4087846975326826e-06, 'epoch': 0.49} + 49%|████▉ | 5970/12188 [12:46:05<12:54:36, 7.47s/it] 49%|████▉ | 5971/12188 [12:46:12<12:26:59, 7.21s/it] {'loss': 0.3301, 'grad_norm': 0.6638887859206367, 'learning_rate': 5.407460425147416e-06, 'epoch': 0.49} + 49%|████▉ | 5971/12188 [12:46:12<12:26:59, 7.21s/it] 49%|████▉ | 5972/12188 [12:46:19<12:24:04, 7.18s/it] {'loss': 0.3398, 'grad_norm': 0.6296255596777681, 'learning_rate': 5.406136123987948e-06, 'epoch': 0.49} + 49%|████▉ | 5972/12188 [12:46:19<12:24:04, 7.18s/it] 49%|████▉ | 5973/12188 [12:46:26<12:25:46, 7.20s/it] {'loss': 0.3445, 'grad_norm': 0.7566191332273655, 'learning_rate': 5.404811794147801e-06, 'epoch': 0.49} + 49%|████▉ | 5973/12188 [12:46:26<12:25:46, 7.20s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f4cad79ec00> +[Try #0] Failed to fetch sample 4581186 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f4cad79ec00> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'What links here'"}, {'from': 'gpt', 'value': '\nclick(x=0.865, y=0.361)\n'}]} + 49%|████▉ | 5974/12188 [12:46:33<12:28:28, 7.23s/it] {'loss': 0.3201, 'grad_norm': 0.6308876097661891, 'learning_rate': 5.403487435720493e-06, 'epoch': 0.49} + 49%|████▉ | 5974/12188 [12:46:33<12:28:28, 7.23s/it] 49%|████▉ | 5975/12188 [12:46:40<12:22:58, 7.17s/it] {'loss': 0.3183, 'grad_norm': 0.7961837940172054, 'learning_rate': 5.402163048799553e-06, 'epoch': 0.49} + 49%|████▉ | 5975/12188 [12:46:40<12:22:58, 7.17s/it] 49%|████▉ | 5976/12188 [12:46:48<12:41:57, 7.36s/it] {'loss': 0.3384, 'grad_norm': 1.2456270148612139, 'learning_rate': 5.400838633478502e-06, 'epoch': 0.49} + 49%|████▉ | 5976/12188 [12:46:48<12:41:57, 7.36s/it] 49%|████▉ | 5977/12188 [12:46:56<12:57:37, 7.51s/it] {'loss': 0.3142, 'grad_norm': 0.6237004484913997, 'learning_rate': 5.399514189850873e-06, 'epoch': 0.49} + 49%|████▉ | 5977/12188 [12:46:56<12:57:37, 7.51s/it] 49%|████▉ | 5978/12188 [12:47:04<12:54:32, 7.48s/it] {'loss': 0.3045, 'grad_norm': 0.7043272324007849, 'learning_rate': 5.398189718010193e-06, 'epoch': 0.49} + 49%|████▉ | 5978/12188 [12:47:04<12:54:32, 7.48s/it] 49%|████▉ | 5979/12188 [12:47:10<12:37:39, 7.32s/it] {'loss': 0.3334, 'grad_norm': 0.6931562139930062, 'learning_rate': 5.396865218049995e-06, 'epoch': 0.49} + 49%|████▉ | 5979/12188 [12:47:10<12:37:39, 7.32s/it] 49%|████▉ | 5980/12188 [12:47:19<13:02:59, 7.57s/it] {'loss': 0.3479, 'grad_norm': 0.6762414404526099, 'learning_rate': 5.395540690063811e-06, 'epoch': 0.49} + 49%|████▉ | 5980/12188 [12:47:19<13:02:59, 7.57s/it] 49%|████▉ | 5981/12188 [12:47:26<12:45:06, 7.40s/it] {'loss': 0.2953, 'grad_norm': 0.6066276635957596, 'learning_rate': 5.39421613414518e-06, 'epoch': 0.49} + 49%|████▉ | 5981/12188 [12:47:26<12:45:06, 7.40s/it] 49%|████▉ | 5982/12188 [12:47:32<12:26:32, 7.22s/it] {'loss': 0.3545, 'grad_norm': 0.6695412709247373, 'learning_rate': 5.39289155038764e-06, 'epoch': 0.49} + 49%|████▉ | 5982/12188 [12:47:32<12:26:32, 7.22s/it] 49%|████▉ | 5983/12188 [12:47:39<12:16:55, 7.13s/it] {'loss': 0.3228, 'grad_norm': 0.6784075495552226, 'learning_rate': 5.3915669388847305e-06, 'epoch': 0.49} + 49%|████▉ | 5983/12188 [12:47:39<12:16:55, 7.13s/it] 49%|████▉ | 5984/12188 [12:47:47<12:39:48, 7.35s/it] {'loss': 0.3234, 'grad_norm': 0.6353730754061591, 'learning_rate': 5.390242299729991e-06, 'epoch': 0.49} + 49%|████▉ | 5984/12188 [12:47:47<12:39:48, 7.35s/it] 49%|████▉ | 5985/12188 [12:47:54<12:20:58, 7.17s/it] {'loss': 0.3452, 'grad_norm': 0.6869652184640813, 'learning_rate': 5.388917633016968e-06, 'epoch': 0.49} + 49%|████▉ | 5985/12188 [12:47:54<12:20:58, 7.17s/it] 49%|████▉ | 5986/12188 [12:48:01<12:17:48, 7.14s/it] {'loss': 0.2839, 'grad_norm': 0.6333295978881687, 'learning_rate': 5.387592938839207e-06, 'epoch': 0.49} + 49%|████▉ | 5986/12188 [12:48:01<12:17:48, 7.14s/it] 49%|████▉ | 5987/12188 [12:48:08<12:08:06, 7.05s/it] {'loss': 0.3754, 'grad_norm': 0.7424279641263108, 'learning_rate': 5.386268217290256e-06, 'epoch': 0.49} + 49%|████▉ | 5987/12188 [12:48:08<12:08:06, 7.05s/it] 49%|████▉ | 5988/12188 [12:48:15<12:05:24, 7.02s/it] {'loss': 0.3714, 'grad_norm': 0.791712620397989, 'learning_rate': 5.384943468463663e-06, 'epoch': 0.49} + 49%|████▉ | 5988/12188 [12:48:15<12:05:24, 7.02s/it] 49%|████▉ | 5989/12188 [12:48:21<11:52:23, 6.90s/it] {'loss': 0.35, 'grad_norm': 0.6742295628890763, 'learning_rate': 5.383618692452979e-06, 'epoch': 0.49} + 49%|████▉ | 5989/12188 [12:48:21<11:52:23, 6.90s/it] 49%|████▉ | 5990/12188 [12:48:28<11:45:06, 6.83s/it] {'loss': 0.3309, 'grad_norm': 0.6275968551617577, 'learning_rate': 5.382293889351762e-06, 'epoch': 0.49} + 49%|████▉ | 5990/12188 [12:48:28<11:45:06, 6.83s/it] 49%|████▉ | 5991/12188 [12:48:35<11:38:13, 6.76s/it] {'loss': 0.401, 'grad_norm': 0.6600562515652123, 'learning_rate': 5.380969059253565e-06, 'epoch': 0.49} + 49%|████▉ | 5991/12188 [12:48:35<11:38:13, 6.76s/it] 49%|████▉ | 5992/12188 [12:48:41<11:34:18, 6.72s/it] {'loss': 0.3613, 'grad_norm': 0.6837979519144922, 'learning_rate': 5.379644202251943e-06, 'epoch': 0.49} + 49%|████▉ | 5992/12188 [12:48:41<11:34:18, 6.72s/it] 49%|████▉ | 5993/12188 [12:48:49<12:05:22, 7.03s/it] {'loss': 0.3221, 'grad_norm': 0.7230604063156064, 'learning_rate': 5.378319318440458e-06, 'epoch': 0.49} + 49%|████▉ | 5993/12188 [12:48:49<12:05:22, 7.03s/it] 49%|████▉ | 5994/12188 [12:48:56<12:05:08, 7.02s/it] {'loss': 0.3156, 'grad_norm': 0.6651753322637123, 'learning_rate': 5.376994407912671e-06, 'epoch': 0.49} + 49%|████▉ | 5994/12188 [12:48:56<12:05:08, 7.02s/it] 49%|████▉ | 5995/12188 [12:49:05<13:01:28, 7.57s/it] {'loss': 0.3165, 'grad_norm': 0.6181499771255744, 'learning_rate': 5.375669470762145e-06, 'epoch': 0.49} + 49%|████▉ | 5995/12188 [12:49:05<13:01:28, 7.57s/it] 49%|████▉ | 5996/12188 [12:49:12<13:00:37, 7.56s/it] {'loss': 0.3436, 'grad_norm': 0.6357819407552753, 'learning_rate': 5.374344507082444e-06, 'epoch': 0.49} + 49%|████▉ | 5996/12188 [12:49:12<13:00:37, 7.56s/it] 49%|████▉ | 5997/12188 [12:49:20<13:02:39, 7.59s/it] {'loss': 0.3548, 'grad_norm': 0.7150247881569385, 'learning_rate': 5.373019516967135e-06, 'epoch': 0.49} + 49%|████▉ | 5997/12188 [12:49:20<13:02:39, 7.59s/it] 49%|████▉ | 5998/12188 [12:49:27<12:35:21, 7.32s/it] {'loss': 0.3496, 'grad_norm': 0.7155639376005716, 'learning_rate': 5.371694500509787e-06, 'epoch': 0.49} + 49%|████▉ | 5998/12188 [12:49:27<12:35:21, 7.32s/it] 49%|████▉ | 5999/12188 [12:49:34<12:19:38, 7.17s/it] {'loss': 0.3601, 'grad_norm': 0.6764483861459452, 'learning_rate': 5.370369457803971e-06, 'epoch': 0.49} + 49%|████▉ | 5999/12188 [12:49:34<12:19:38, 7.17s/it] 49%|████▉ | 6000/12188 [12:49:42<12:51:53, 7.48s/it] {'loss': 0.3279, 'grad_norm': 0.8160193188475613, 'learning_rate': 5.3690443889432595e-06, 'epoch': 0.49} + 49%|████▉ | 6000/12188 [12:49:42<12:51:53, 7.48s/it]/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/utils/checkpoint.py:87: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( + 49%|████▉ | 6001/12188 [12:50:04<20:38:03, 12.01s/it] {'loss': 0.3345, 'grad_norm': 0.7434928325330301, 'learning_rate': 5.367719294021225e-06, 'epoch': 0.49} + 49%|████▉ | 6001/12188 [12:50:04<20:38:03, 12.01s/it] 49%|████▉ | 6002/12188 [12:50:11<17:55:33, 10.43s/it] {'loss': 0.3089, 'grad_norm': 0.6335670062549674, 'learning_rate': 5.366394173131445e-06, 'epoch': 0.49} + 49%|████▉ | 6002/12188 [12:50:11<17:55:33, 10.43s/it] 49%|████▉ | 6003/12188 [12:50:18<16:07:32, 9.39s/it] {'loss': 0.3449, 'grad_norm': 0.8943111010896452, 'learning_rate': 5.365069026367498e-06, 'epoch': 0.49} + 49%|██���█▉ | 6003/12188 [12:50:18<16:07:32, 9.39s/it] 49%|████▉ | 6004/12188 [12:50:25<14:45:10, 8.59s/it] {'loss': 0.2912, 'grad_norm': 0.6281513543899186, 'learning_rate': 5.363743853822962e-06, 'epoch': 0.49} + 49%|████▉ | 6004/12188 [12:50:25<14:45:10, 8.59s/it] 49%|████▉ | 6005/12188 [12:50:32<14:09:34, 8.24s/it] {'loss': 0.3822, 'grad_norm': 0.8872501393460088, 'learning_rate': 5.3624186555914205e-06, 'epoch': 0.49} + 49%|████▉ | 6005/12188 [12:50:32<14:09:34, 8.24s/it] 49%|████▉ | 6006/12188 [12:50:39<13:21:43, 7.78s/it] {'loss': 0.3354, 'grad_norm': 0.6798766969777158, 'learning_rate': 5.361093431766455e-06, 'epoch': 0.49} + 49%|████▉ | 6006/12188 [12:50:39<13:21:43, 7.78s/it] 49%|████▉ | 6007/12188 [12:50:47<13:22:06, 7.79s/it] {'loss': 0.3282, 'grad_norm': 0.7502490522867507, 'learning_rate': 5.3597681824416505e-06, 'epoch': 0.49} + 49%|████▉ | 6007/12188 [12:50:47<13:22:06, 7.79s/it] 49%|████▉ | 6008/12188 [12:50:54<12:50:34, 7.48s/it] {'loss': 0.3571, 'grad_norm': 0.6931439827934208, 'learning_rate': 5.3584429077105955e-06, 'epoch': 0.49} + 49%|████▉ | 6008/12188 [12:50:54<12:50:34, 7.48s/it] 49%|████▉ | 6009/12188 [12:51:01<12:37:23, 7.35s/it] {'loss': 0.3238, 'grad_norm': 0.790713187084373, 'learning_rate': 5.35711760766688e-06, 'epoch': 0.49} + 49%|████▉ | 6009/12188 [12:51:01<12:37:23, 7.35s/it] 49%|████▉ | 6010/12188 [12:51:08<12:38:15, 7.36s/it] {'loss': 0.3426, 'grad_norm': 0.8015063182705268, 'learning_rate': 5.355792282404091e-06, 'epoch': 0.49} + 49%|████▉ | 6010/12188 [12:51:08<12:38:15, 7.36s/it] 49%|████▉ | 6011/12188 [12:51:15<12:33:28, 7.32s/it] {'loss': 0.3506, 'grad_norm': 0.7555985162589158, 'learning_rate': 5.3544669320158235e-06, 'epoch': 0.49} + 49%|████▉ | 6011/12188 [12:51:15<12:33:28, 7.32s/it] 49%|████▉ | 6012/12188 [12:51:23<12:37:32, 7.36s/it] {'loss': 0.3349, 'grad_norm': 0.7224424865220608, 'learning_rate': 5.35314155659567e-06, 'epoch': 0.49} + 49%|████▉ | 6012/12188 [12:51:23<12:37:32, 7.36s/it] 49%|████▉ | 6013/12188 [12:51:29<12:06:56, 7.06s/it] {'loss': 0.3392, 'grad_norm': 0.6535880746583861, 'learning_rate': 5.351816156237228e-06, 'epoch': 0.49} + 49%|████▉ | 6013/12188 [12:51:29<12:06:56, 7.06s/it] 49%|████▉ | 6014/12188 [12:51:37<12:39:12, 7.38s/it] {'loss': 0.3306, 'grad_norm': 0.7131664715374354, 'learning_rate': 5.350490731034093e-06, 'epoch': 0.49} + 49%|████▉ | 6014/12188 [12:51:37<12:39:12, 7.38s/it] 49%|████▉ | 6015/12188 [12:51:45<12:42:12, 7.41s/it] {'loss': 0.3237, 'grad_norm': 0.6359298459988126, 'learning_rate': 5.349165281079868e-06, 'epoch': 0.49} + 49%|████▉ | 6015/12188 [12:51:45<12:42:12, 7.41s/it] 49%|████▉ | 6016/12188 [12:51:53<13:00:30, 7.59s/it] {'loss': 0.3374, 'grad_norm': 0.7492323854568221, 'learning_rate': 5.347839806468149e-06, 'epoch': 0.49} + 49%|████▉ | 6016/12188 [12:51:53<13:00:30, 7.59s/it] 49%|████▉ | 6017/12188 [12:51:59<12:33:35, 7.33s/it] {'loss': 0.3747, 'grad_norm': 0.6952182991216241, 'learning_rate': 5.346514307292543e-06, 'epoch': 0.49} + 49%|████▉ | 6017/12188 [12:51:59<12:33:35, 7.33s/it] 49%|████▉ | 6018/12188 [12:52:06<12:25:26, 7.25s/it] {'loss': 0.3366, 'grad_norm': 0.6790413177039417, 'learning_rate': 5.345188783646654e-06, 'epoch': 0.49} + 49%|████▉ | 6018/12188 [12:52:06<12:25:26, 7.25s/it] 49%|████▉ | 6019/12188 [12:52:14<12:30:23, 7.30s/it] {'loss': 0.3563, 'grad_norm': 0.7100749776983348, 'learning_rate': 5.343863235624088e-06, 'epoch': 0.49} + 49%|████▉ | 6019/12188 [12:52:14<12:30:23, 7.30s/it] 49%|████▉ | 6020/12188 [12:52:21<12:30:24, 7.30s/it] {'loss': 0.3169, 'grad_norm': 0.6670286443983946, 'learning_rate': 5.342537663318451e-06, 'epoch': 0.49} + 49%|████▉ | 6020/12188 [12:52:21<12:30:24, 7.30s/it] 49%|████▉ | 6021/12188 [12:52:28<12:19:42, 7.20s/it] {'loss': 0.3216, 'grad_norm': 0.8260964332137584, 'learning_rate': 5.341212066823356e-06, 'epoch': 0.49} + 49%|████▉ | 6021/12188 [12:52:28<12:19:42, 7.20s/it] 49%|████▉ | 6022/12188 [12:52:35<12:24:07, 7.24s/it] {'loss': 0.2935, 'grad_norm': 0.6363881917118904, 'learning_rate': 5.339886446232412e-06, 'epoch': 0.49} + 49%|████▉ | 6022/12188 [12:52:35<12:24:07, 7.24s/it] 49%|████▉ | 6023/12188 [12:52:44<13:01:56, 7.61s/it] {'loss': 0.3475, 'grad_norm': 0.7044754723806042, 'learning_rate': 5.338560801639234e-06, 'epoch': 0.49} + 49%|████▉ | 6023/12188 [12:52:44<13:01:56, 7.61s/it] 49%|████▉ | 6024/12188 [12:52:51<12:34:41, 7.35s/it] {'loss': 0.3749, 'grad_norm': 0.7695122932742788, 'learning_rate': 5.337235133137436e-06, 'epoch': 0.49} + 49%|████▉ | 6024/12188 [12:52:51<12:34:41, 7.35s/it] 49%|████▉ | 6025/12188 [12:52:58<12:38:50, 7.39s/it] {'loss': 0.3241, 'grad_norm': 0.6463844295631681, 'learning_rate': 5.335909440820635e-06, 'epoch': 0.49} + 49%|████▉ | 6025/12188 [12:52:58<12:38:50, 7.39s/it] 49%|████▉ | 6026/12188 [12:53:05<12:25:06, 7.26s/it] {'loss': 0.3307, 'grad_norm': 0.7110365207689999, 'learning_rate': 5.334583724782449e-06, 'epoch': 0.49} + 49%|████▉ | 6026/12188 [12:53:05<12:25:06, 7.26s/it] 49%|████▉ | 6027/12188 [12:53:14<13:04:15, 7.64s/it] {'loss': 0.3584, 'grad_norm': 0.7488808193618336, 'learning_rate': 5.3332579851164965e-06, 'epoch': 0.49} + 49%|████▉ | 6027/12188 [12:53:14<13:04:15, 7.64s/it] 49%|████▉ | 6028/12188 [12:53:21<13:13:24, 7.73s/it] {'loss': 0.3467, 'grad_norm': 0.8584224193852777, 'learning_rate': 5.331932221916401e-06, 'epoch': 0.49} + 49%|████▉ | 6028/12188 [12:53:21<13:13:24, 7.73s/it] 49%|████▉ | 6029/12188 [12:53:29<13:00:14, 7.60s/it] {'loss': 0.3598, 'grad_norm': 0.6370878492845486, 'learning_rate': 5.330606435275785e-06, 'epoch': 0.49} + 49%|████▉ | 6029/12188 [12:53:29<13:00:14, 7.60s/it] 49%|████▉ | 6030/12188 [12:53:36<12:35:06, 7.36s/it] {'loss': 0.3565, 'grad_norm': 0.6633770087693207, 'learning_rate': 5.329280625288274e-06, 'epoch': 0.49} + 49%|████▉ | 6030/12188 [12:53:36<12:35:06, 7.36s/it] 49%|████▉ | 6031/12188 [12:53:43<12:35:23, 7.36s/it] {'loss': 0.301, 'grad_norm': 0.6111294751533995, 'learning_rate': 5.327954792047495e-06, 'epoch': 0.49} + 49%|████▉ | 6031/12188 [12:53:43<12:35:23, 7.36s/it] 49%|████▉ | 6032/12188 [12:53:53<14:07:05, 8.26s/it] {'loss': 0.3212, 'grad_norm': 0.6279158939069744, 'learning_rate': 5.326628935647076e-06, 'epoch': 0.49} + 49%|████▉ | 6032/12188 [12:53:53<14:07:05, 8.26s/it] 49%|████▉ | 6033/12188 [12:54:01<14:01:35, 8.20s/it] {'loss': 0.3208, 'grad_norm': 0.6738451226721919, 'learning_rate': 5.3253030561806464e-06, 'epoch': 0.49} + 49%|████▉ | 6033/12188 [12:54:01<14:01:35, 8.20s/it] 50%|████▉ | 6034/12188 [12:54:10<14:25:22, 8.44s/it] {'loss': 0.33, 'grad_norm': 0.760700321997078, 'learning_rate': 5.323977153741835e-06, 'epoch': 0.5} + 50%|████▉ | 6034/12188 [12:54:10<14:25:22, 8.44s/it] 50%|████▉ | 6035/12188 [12:54:18<13:47:06, 8.07s/it] {'loss': 0.3201, 'grad_norm': 0.8221194738493767, 'learning_rate': 5.322651228424279e-06, 'epoch': 0.5} + 50%|████▉ | 6035/12188 [12:54:18<13:47:06, 8.07s/it] 50%|████▉ | 6036/12188 [12:54:26<13:50:54, 8.10s/it] {'loss': 0.3229, 'grad_norm': 0.6850611954102807, 'learning_rate': 5.321325280321613e-06, 'epoch': 0.5} + 50%|████▉ | 6036/12188 [12:54:26<13:50:54, 8.10s/it] 50%|████▉ | 6037/12188 [12:54:33<13:10:08, 7.71s/it] {'loss': 0.344, 'grad_norm': 0.6375108284043189, 'learning_rate': 5.31999930952747e-06, 'epoch': 0.5} + 50%|████▉ | 6037/12188 [12:54:33<13:10:08, 7.71s/it] 50%|████▉ | 6038/12188 [12:54:39<12:37:49, 7.39s/it] {'loss': 0.3061, 'grad_norm': 0.6556045785511098, 'learning_rate': 5.3186733161354895e-06, 'epoch': 0.5} + 50%|████▉ | 6038/12188 [12:54:39<12:37:49, 7.39s/it] 50%|████▉ | 6039/12188 [12:54:46<12:23:33, 7.26s/it] {'loss': 0.3571, 'grad_norm': 0.691163575082186, 'learning_rate': 5.317347300239314e-06, 'epoch': 0.5} + 50%|████▉ | 6039/12188 [12:54:46<12:23:33, 7.26s/it] 50%|████▉ | 6040/12188 [12:54:53<12:22:20, 7.24s/it] {'loss': 0.2941, 'grad_norm': 0.7378008109128339, 'learning_rate': 5.316021261932581e-06, 'epoch': 0.5} + 50%|████▉ | 6040/12188 [12:54:53<12:22:20, 7.24s/it] 50%|████▉ | 6041/12188 [12:55:02<12:58:43, 7.60s/it] {'loss': 0.3237, 'grad_norm': 0.9292790021999193, 'learning_rate': 5.314695201308934e-06, 'epoch': 0.5} + 50%|████▉ | 6041/12188 [12:55:02<12:58:43, 7.60s/it] 50%|████▉ | 6042/12188 [12:55:10<13:02:18, 7.64s/it] {'loss': 0.3354, 'grad_norm': 0.6124287426459596, 'learning_rate': 5.313369118462017e-06, 'epoch': 0.5} + 50%|████▉ | 6042/12188 [12:55:10<13:02:18, 7.64s/it] 50%|████▉ | 6043/12188 [12:55:16<12:28:49, 7.31s/it] {'loss': 0.3311, 'grad_norm': 0.7187552392399367, 'learning_rate': 5.312043013485476e-06, 'epoch': 0.5} + 50%|████▉ | 6043/12188 [12:55:16<12:28:49, 7.31s/it] 50%|████▉ | 6044/12188 [12:55:23<12:14:59, 7.18s/it] {'loss': 0.3156, 'grad_norm': 1.077032724995278, 'learning_rate': 5.31071688647296e-06, 'epoch': 0.5} + 50%|████▉ | 6044/12188 [12:55:23<12:14:59, 7.18s/it] 50%|████▉ | 6045/12188 [12:55:30<12:13:06, 7.16s/it] {'loss': 0.3155, 'grad_norm': 0.7838118918770594, 'learning_rate': 5.309390737518115e-06, 'epoch': 0.5} + 50%|████▉ | 6045/12188 [12:55:30<12:13:06, 7.16s/it] 50%|████▉ | 6046/12188 [12:55:38<12:27:30, 7.30s/it] {'loss': 0.363, 'grad_norm': 0.6868417580825388, 'learning_rate': 5.308064566714595e-06, 'epoch': 0.5} + 50%|████▉ | 6046/12188 [12:55:38<12:27:30, 7.30s/it] 50%|████▉ | 6047/12188 [12:55:45<12:23:42, 7.27s/it] {'loss': 0.3402, 'grad_norm': 0.6960748943465731, 'learning_rate': 5.30673837415605e-06, 'epoch': 0.5} + 50%|████▉ | 6047/12188 [12:55:45<12:23:42, 7.27s/it] 50%|████▉ | 6048/12188 [12:55:52<12:23:55, 7.27s/it] {'loss': 0.3004, 'grad_norm': 0.6476635917157717, 'learning_rate': 5.305412159936133e-06, 'epoch': 0.5} + 50%|████▉ | 6048/12188 [12:55:52<12:23:55, 7.27s/it] 50%|████▉ | 6049/12188 [12:55:59<12:16:48, 7.20s/it] {'loss': 0.3004, 'grad_norm': 0.6425369787855844, 'learning_rate': 5.3040859241485e-06, 'epoch': 0.5} + 50%|████▉ | 6049/12188 [12:55:59<12:16:48, 7.20s/it] 50%|████▉ | 6050/12188 [12:56:06<12:05:15, 7.09s/it] {'loss': 0.2777, 'grad_norm': 0.6373243866723795, 'learning_rate': 5.302759666886807e-06, 'epoch': 0.5} + 50%|████▉ | 6050/12188 [12:56:06<12:05:15, 7.09s/it] 50%|████▉ | 6051/12188 [12:56:13<12:04:30, 7.08s/it] {'loss': 0.3521, 'grad_norm': 0.6353924235572437, 'learning_rate': 5.301433388244715e-06, 'epoch': 0.5} + 50%|████▉ | 6051/12188 [12:56:13<12:04:30, 7.08s/it] 50%|████▉ | 6052/12188 [12:56:20<12:09:18, 7.13s/it] {'loss': 0.3887, 'grad_norm': 0.7039078644660405, 'learning_rate': 5.300107088315878e-06, 'epoch': 0.5} + 50%|████▉ | 6052/12188 [12:56:20<12:09:18, 7.13s/it] 50%|████▉ | 6053/12188 [12:56:28<12:23:49, 7.27s/it] {'loss': 0.2903, 'grad_norm': 0.6733312497511752, 'learning_rate': 5.298780767193964e-06, 'epoch': 0.5} + 50%|████▉ | 6053/12188 [12:56:28<12:23:49, 7.27s/it] 50%|████▉ | 6054/12188 [12:56:35<12:04:58, 7.09s/it] {'loss': 0.3258, 'grad_norm': 0.6115749732579427, 'learning_rate': 5.297454424972631e-06, 'epoch': 0.5} + 50%|████▉ | 6054/12188 [12:56:35<12:04:58, 7.09s/it] 50%|████▉ | 6055/12188 [12:56:42<12:04:38, 7.09s/it] {'loss': 0.4006, 'grad_norm': 0.8446017076890843, 'learning_rate': 5.296128061745545e-06, 'epoch': 0.5} + 50%|████▉ | 6055/12188 [12:56:42<12:04:38, 7.09s/it] 50%|████▉ | 6056/12188 [12:56:49<12:04:20, 7.09s/it] {'loss': 0.3393, 'grad_norm': 0.686798132379945, 'learning_rate': 5.294801677606369e-06, 'epoch': 0.5} + 50%|████▉ | 6056/12188 [12:56:49<12:04:20, 7.09s/it] 50%|████▉ | 6057/12188 [12:56:56<12:14:56, 7.19s/it] {'loss': 0.3137, 'grad_norm': 0.6349385925853959, 'learning_rate': 5.2934752726487725e-06, 'epoch': 0.5} + 50%|████▉ | 6057/12188 [12:56:56<12:14:56, 7.19s/it] 50%|████▉ | 6058/12188 [12:57:03<12:06:17, 7.11s/it] {'loss': 0.3416, 'grad_norm': 0.6350511441674974, 'learning_rate': 5.292148846966425e-06, 'epoch': 0.5} + 50%|████▉ | 6058/12188 [12:57:03<12:06:17, 7.11s/it] 50%|████▉ | 6059/12188 [12:57:10<12:02:30, 7.07s/it] {'loss': 0.3311, 'grad_norm': 0.6640242806455906, 'learning_rate': 5.290822400652995e-06, 'epoch': 0.5} + 50%|████▉ | 6059/12188 [12:57:10<12:02:30, 7.07s/it] 50%|████▉ | 6060/12188 [12:57:17<11:54:30, 7.00s/it] {'loss': 0.3434, 'grad_norm': 0.6768722331842102, 'learning_rate': 5.289495933802155e-06, 'epoch': 0.5} + 50%|████▉ | 6060/12188 [12:57:17<11:54:30, 7.00s/it] 50%|████▉ | 6061/12188 [12:57:25<12:27:40, 7.32s/it] {'loss': 0.333, 'grad_norm': 0.6645244591261519, 'learning_rate': 5.288169446507576e-06, 'epoch': 0.5} + 50%|████▉ | 6061/12188 [12:57:25<12:27:40, 7.32s/it] 50%|████▉ | 6062/12188 [12:57:33<12:53:26, 7.58s/it] {'loss': 0.3452, 'grad_norm': 0.7775140509106834, 'learning_rate': 5.286842938862934e-06, 'epoch': 0.5} + 50%|████▉ | 6062/12188 [12:57:33<12:53:26, 7.58s/it] 50%|████▉ | 6063/12188 [12:57:40<12:33:57, 7.39s/it] {'loss': 0.3732, 'grad_norm': 0.646546074532065, 'learning_rate': 5.285516410961905e-06, 'epoch': 0.5} + 50%|████▉ | 6063/12188 [12:57:40<12:33:57, 7.39s/it] 50%|████▉ | 6064/12188 [12:57:47<12:17:31, 7.23s/it] {'loss': 0.3498, 'grad_norm': 0.6508937368436576, 'learning_rate': 5.284189862898166e-06, 'epoch': 0.5} + 50%|████▉ | 6064/12188 [12:57:47<12:17:31, 7.23s/it] 50%|████▉ | 6065/12188 [12:57:54<12:16:14, 7.21s/it] {'loss': 0.3146, 'grad_norm': 0.6286957009574038, 'learning_rate': 5.282863294765393e-06, 'epoch': 0.5} + 50%|████▉ | 6065/12188 [12:57:54<12:16:14, 7.21s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 50%|████▉ | 6066/12188 [12:58:01<11:54:14, 7.00s/it] {'loss': 0.6718, 'grad_norm': 0.6457312570015231, 'learning_rate': 5.281536706657271e-06, 'epoch': 0.5} + 50%|████▉ | 6066/12188 [12:58:01<11:54:14, 7.00s/it] 50%|████▉ | 6067/12188 [12:58:08<11:54:03, 7.00s/it] {'loss': 0.3529, 'grad_norm': 0.6606843219297923, 'learning_rate': 5.28021009866748e-06, 'epoch': 0.5} + 50%|████▉ | 6067/12188 [12:58:08<11:54:03, 7.00s/it] 50%|████▉ | 6068/12188 [12:58:15<11:51:29, 6.98s/it] {'loss': 0.3671, 'grad_norm': 0.6759966810409561, 'learning_rate': 5.2788834708897005e-06, 'epoch': 0.5} + 50%|████▉ | 6068/12188 [12:58:15<11:51:29, 6.98s/it] 50%|████▉ | 6069/12188 [12:58:22<12:15:03, 7.21s/it] {'loss': 0.3436, 'grad_norm': 0.7135059270032462, 'learning_rate': 5.277556823417619e-06, 'epoch': 0.5} + 50%|████▉ | 6069/12188 [12:58:22<12:15:03, 7.21s/it] 50%|████▉ | 6070/12188 [12:58:29<11:59:16, 7.05s/it] {'loss': 0.2979, 'grad_norm': 1.0043934256061646, 'learning_rate': 5.2762301563449195e-06, 'epoch': 0.5} + 50%|████▉ | 6070/12188 [12:58:29<11:59:16, 7.05s/it] 50%|████▉ | 6071/12188 [12:58:37<12:19:09, 7.25s/it] {'loss': 0.3467, 'grad_norm': 0.6689288599105845, 'learning_rate': 5.27490346976529e-06, 'epoch': 0.5} + 50%|████▉ | 6071/12188 [12:58:37<12:19:09, 7.25s/it] 50%|████▉ | 6072/12188 [12:58:43<12:03:30, 7.10s/it] {'loss': 0.36, 'grad_norm': 0.7216974845186587, 'learning_rate': 5.27357676377242e-06, 'epoch': 0.5} + 50%|████▉ | 6072/12188 [12:58:43<12:03:30, 7.10s/it] 50%|████▉ | 6073/12188 [12:58:51<12:05:51, 7.12s/it] {'loss': 0.3358, 'grad_norm': 0.6752512900699371, 'learning_rate': 5.272250038459999e-06, 'epoch': 0.5} + 50%|████▉ | 6073/12188 [12:58:51<12:05:51, 7.12s/it] 50%|████▉ | 6074/12188 [12:58:58<12:22:39, 7.29s/it] {'loss': 0.3421, 'grad_norm': 0.6881755531452278, 'learning_rate': 5.270923293921716e-06, 'epoch': 0.5} + 50%|████▉ | 6074/12188 [12:58:58<12:22:39, 7.29s/it] 50%|████▉ | 6075/12188 [12:59:06<12:31:31, 7.38s/it] {'loss': 0.3106, 'grad_norm': 0.6706942321771636, 'learning_rate': 5.269596530251267e-06, 'epoch': 0.5} + 50%|████▉ | 6075/12188 [12:59:06<12:31:31, 7.38s/it] 50%|████▉ | 6076/12188 [12:59:13<12:12:57, 7.20s/it] {'loss': 0.3386, 'grad_norm': 0.65081123660065, 'learning_rate': 5.268269747542343e-06, 'epoch': 0.5} + 50%|████▉ | 6076/12188 [12:59:13<12:12:57, 7.20s/it] 50%|████▉ | 6077/12188 [12:59:20<12:16:31, 7.23s/it] {'loss': 0.3632, 'grad_norm': 1.0453441021184628, 'learning_rate': 5.2669429458886415e-06, 'epoch': 0.5} + 50%|████▉ | 6077/12188 [12:59:20<12:16:31, 7.23s/it] 50%|████▉ | 6078/12188 [12:59:27<12:06:19, 7.13s/it] {'loss': 0.3454, 'grad_norm': 0.6282639210651904, 'learning_rate': 5.265616125383857e-06, 'epoch': 0.5} + 50%|████▉ | 6078/12188 [12:59:27<12:06:19, 7.13s/it] 50%|████▉ | 6079/12188 [12:59:34<12:12:13, 7.19s/it] {'loss': 0.2814, 'grad_norm': 0.6043044372036198, 'learning_rate': 5.264289286121687e-06, 'epoch': 0.5} + 50%|████▉ | 6079/12188 [12:59:34<12:12:13, 7.19s/it] 50%|████▉ | 6080/12188 [12:59:42<12:27:49, 7.35s/it] {'loss': 0.3288, 'grad_norm': 0.7034529729790224, 'learning_rate': 5.262962428195835e-06, 'epoch': 0.5} + 50%|████▉ | 6080/12188 [12:59:42<12:27:49, 7.35s/it] 50%|████▉ | 6081/12188 [12:59:49<12:19:12, 7.26s/it] {'loss': 0.3389, 'grad_norm': 0.7010725495055713, 'learning_rate': 5.2616355516999995e-06, 'epoch': 0.5} + 50%|████▉ | 6081/12188 [12:59:49<12:19:12, 7.26s/it] 50%|████▉ | 6082/12188 [12:59:57<12:34:25, 7.41s/it] {'loss': 0.3071, 'grad_norm': 0.612323161141405, 'learning_rate': 5.2603086567278795e-06, 'epoch': 0.5} + 50%|████▉ | 6082/12188 [12:59:57<12:34:25, 7.41s/it] 50%|████▉ | 6083/12188 [13:00:04<12:24:17, 7.31s/it] {'loss': 0.3294, 'grad_norm': 0.6226115477420574, 'learning_rate': 5.25898174337318e-06, 'epoch': 0.5} + 50%|████▉ | 6083/12188 [13:00:04<12:24:17, 7.31s/it] 50%|████▉ | 6084/12188 [13:00:11<12:08:30, 7.16s/it] {'loss': 0.3317, 'grad_norm': 0.8545483617258917, 'learning_rate': 5.257654811729606e-06, 'epoch': 0.5} + 50%|████▉ | 6084/12188 [13:00:11<12:08:30, 7.16s/it] 50%|████▉ | 6085/12188 [13:00:18<12:01:44, 7.10s/it] {'loss': 0.3168, 'grad_norm': 0.7164052387965758, 'learning_rate': 5.256327861890865e-06, 'epoch': 0.5} + 50%|████▉ | 6085/12188 [13:00:18<12:01:44, 7.10s/it] 50%|████▉ | 6086/12188 [13:00:24<11:49:17, 6.97s/it] {'loss': 0.3114, 'grad_norm': 0.8476147705197375, 'learning_rate': 5.255000893950662e-06, 'epoch': 0.5} + 50%|████▉ | 6086/12188 [13:00:24<11:49:17, 6.97s/it] 50%|████▉ | 6087/12188 [13:00:32<12:21:04, 7.29s/it] {'loss': 0.3489, 'grad_norm': 0.810483009763398, 'learning_rate': 5.2536739080027046e-06, 'epoch': 0.5} + 50%|████▉ | 6087/12188 [13:00:32<12:21:04, 7.29s/it] 50%|████▉ | 6088/12188 [13:00:39<12:11:37, 7.20s/it] {'loss': 0.317, 'grad_norm': 0.6383311921764868, 'learning_rate': 5.252346904140704e-06, 'epoch': 0.5} + 50%|████▉ | 6088/12188 [13:00:39<12:11:37, 7.20s/it] 50%|████▉ | 6089/12188 [13:00:46<12:05:54, 7.14s/it] {'loss': 0.3349, 'grad_norm': 1.002087390856705, 'learning_rate': 5.2510198824583695e-06, 'epoch': 0.5} + 50%|████▉ | 6089/12188 [13:00:46<12:05:54, 7.14s/it] 50%|████▉ | 6090/12188 [13:00:54<12:25:38, 7.34s/it] {'loss': 0.3452, 'grad_norm': 0.606586052818834, 'learning_rate': 5.2496928430494145e-06, 'epoch': 0.5} + 50%|████▉ | 6090/12188 [13:00:54<12:25:38, 7.34s/it] 50%|████▉ | 6091/12188 [13:01:01<12:04:15, 7.13s/it] {'loss': 0.3315, 'grad_norm': 0.7224670415550207, 'learning_rate': 5.248365786007552e-06, 'epoch': 0.5} + 50%|████▉ | 6091/12188 [13:01:01<12:04:15, 7.13s/it] 50%|████▉ | 6092/12188 [13:01:08<12:07:14, 7.16s/it] {'loss': 0.3436, 'grad_norm': 0.7267695598330209, 'learning_rate': 5.247038711426497e-06, 'epoch': 0.5} + 50%|████▉ | 6092/12188 [13:01:08<12:07:14, 7.16s/it] 50%|████▉ | 6093/12188 [13:01:15<11:49:47, 6.99s/it] {'loss': 0.3165, 'grad_norm': 0.6721029504995465, 'learning_rate': 5.245711619399965e-06, 'epoch': 0.5} + 50%|████▉ | 6093/12188 [13:01:15<11:49:47, 6.99s/it] 50%|█████ | 6094/12188 [13:01:23<12:43:10, 7.51s/it] {'loss': 0.3014, 'grad_norm': 0.5530515338948787, 'learning_rate': 5.244384510021673e-06, 'epoch': 0.5} + 50%|█████ | 6094/12188 [13:01:23<12:43:10, 7.51s/it] 50%|█████ | 6095/12188 [13:01:31<12:59:51, 7.68s/it] {'loss': 0.3249, 'grad_norm': 0.7304962526315347, 'learning_rate': 5.2430573833853415e-06, 'epoch': 0.5} + 50%|█████ | 6095/12188 [13:01:31<12:59:51, 7.68s/it] 50%|█████ | 6096/12188 [13:01:40<13:21:53, 7.90s/it] {'loss': 0.3327, 'grad_norm': 0.6809740755112174, 'learning_rate': 5.241730239584686e-06, 'epoch': 0.5} + 50%|█████ | 6096/12188 [13:01:40<13:21:53, 7.90s/it] 50%|█████ | 6097/12188 [13:01:47<13:02:22, 7.71s/it] {'loss': 0.3257, 'grad_norm': 0.6863533613800127, 'learning_rate': 5.240403078713429e-06, 'epoch': 0.5} + 50%|█████ | 6097/12188 [13:01:47<13:02:22, 7.71s/it] 50%|█████ | 6098/12188 [13:01:54<12:45:07, 7.54s/it] {'loss': 0.3237, 'grad_norm': 0.6644444321415502, 'learning_rate': 5.239075900865294e-06, 'epoch': 0.5} + 50%|█████ | 6098/12188 [13:01:54<12:45:07, 7.54s/it] 50%|█████ | 6099/12188 [13:02:02<12:39:22, 7.48s/it] {'loss': 0.3238, 'grad_norm': 0.6651939029240997, 'learning_rate': 5.2377487061340024e-06, 'epoch': 0.5} + 50%|█████ | 6099/12188 [13:02:02<12:39:22, 7.48s/it] 50%|█████ | 6100/12188 [13:02:09<12:26:43, 7.36s/it] {'loss': 0.3755, 'grad_norm': 0.7601981224910895, 'learning_rate': 5.23642149461328e-06, 'epoch': 0.5} + 50%|█████ | 6100/12188 [13:02:09<12:26:43, 7.36s/it] 50%|█████ | 6101/12188 [13:02:15<12:10:31, 7.20s/it] {'loss': 0.3276, 'grad_norm': 0.6735649494984945, 'learning_rate': 5.2350942663968496e-06, 'epoch': 0.5} + 50%|█████ | 6101/12188 [13:02:15<12:10:31, 7.20s/it] 50%|█████ | 6102/12188 [13:02:23<12:12:30, 7.22s/it] {'loss': 0.3105, 'grad_norm': 0.8946368250686952, 'learning_rate': 5.233767021578442e-06, 'epoch': 0.5} + 50%|█████ | 6102/12188 [13:02:23<12:12:30, 7.22s/it] 50%|█████ | 6103/12188 [13:02:30<12:21:35, 7.31s/it] {'loss': 0.324, 'grad_norm': 0.6717597790159182, 'learning_rate': 5.232439760251782e-06, 'epoch': 0.5} + 50%|█████ | 6103/12188 [13:02:30<12:21:35, 7.31s/it] 50%|█████ | 6104/12188 [13:02:37<12:06:26, 7.16s/it] {'loss': 0.3095, 'grad_norm': 0.6796383681163426, 'learning_rate': 5.2311124825106e-06, 'epoch': 0.5} + 50%|█████ | 6104/12188 [13:02:37<12:06:26, 7.16s/it] 50%|█████ | 6105/12188 [13:02:45<12:38:00, 7.48s/it] {'loss': 0.3466, 'grad_norm': 0.7823821330063024, 'learning_rate': 5.2297851884486254e-06, 'epoch': 0.5} + 50%|█████ | 6105/12188 [13:02:45<12:38:00, 7.48s/it] 50%|█████ | 6106/12188 [13:02:52<12:21:46, 7.32s/it] {'loss': 0.3372, 'grad_norm': 1.1330972701635542, 'learning_rate': 5.2284578781595885e-06, 'epoch': 0.5} + 50%|█████ | 6106/12188 [13:02:52<12:21:46, 7.32s/it] 50%|█████ | 6107/12188 [13:03:00<12:24:36, 7.35s/it] {'loss': 0.3162, 'grad_norm': 0.6431825378604005, 'learning_rate': 5.227130551737224e-06, 'epoch': 0.5} + 50%|█████ | 6107/12188 [13:03:00<12:24:36, 7.35s/it] 50%|█████ | 6108/12188 [13:03:07<12:30:37, 7.41s/it] {'loss': 0.2962, 'grad_norm': 0.7269666222040047, 'learning_rate': 5.225803209275266e-06, 'epoch': 0.5} + 50%|█████ | 6108/12188 [13:03:07<12:30:37, 7.41s/it] 50%|█████ | 6109/12188 [13:03:14<12:16:36, 7.27s/it] {'loss': 0.338, 'grad_norm': 0.7105314655865291, 'learning_rate': 5.224475850867449e-06, 'epoch': 0.5} + 50%|█████ | 6109/12188 [13:03:14<12:16:36, 7.27s/it] 50%|█████ | 6110/12188 [13:03:21<12:01:16, 7.12s/it] {'loss': 0.3407, 'grad_norm': 0.6877798325762804, 'learning_rate': 5.223148476607506e-06, 'epoch': 0.5} + 50%|█████ | 6110/12188 [13:03:21<12:01:16, 7.12s/it] 50%|█████ | 6111/12188 [13:03:28<12:03:17, 7.14s/it] {'loss': 0.343, 'grad_norm': 0.971232972345301, 'learning_rate': 5.221821086589176e-06, 'epoch': 0.5} + 50%|█████ | 6111/12188 [13:03:28<12:03:17, 7.14s/it] 50%|█████ | 6112/12188 [13:03:35<11:54:44, 7.06s/it] {'loss': 0.3254, 'grad_norm': 0.7262258997798167, 'learning_rate': 5.2204936809061975e-06, 'epoch': 0.5} + 50%|█████ | 6112/12188 [13:03:35<11:54:44, 7.06s/it] 50%|█████ | 6113/12188 [13:03:43<12:34:34, 7.45s/it] {'loss': 0.3207, 'grad_norm': 0.6388258225955606, 'learning_rate': 5.219166259652311e-06, 'epoch': 0.5} + 50%|█████ | 6113/12188 [13:03:43<12:34:34, 7.45s/it] 50%|█████ | 6114/12188 [13:03:51<12:45:58, 7.57s/it] {'loss': 0.3249, 'grad_norm': 0.6673940797577163, 'learning_rate': 5.217838822921255e-06, 'epoch': 0.5} + 50%|█████ | 6114/12188 [13:03:51<12:45:58, 7.57s/it] 50%|█████ | 6115/12188 [13:03:59<13:10:18, 7.81s/it] {'loss': 0.2824, 'grad_norm': 0.6802134076996373, 'learning_rate': 5.21651137080677e-06, 'epoch': 0.5} + 50%|█████ | 6115/12188 [13:04:00<13:10:18, 7.81s/it] 50%|█████ | 6116/12188 [13:04:07<12:49:57, 7.61s/it] {'loss': 0.3092, 'grad_norm': 0.6749672700726236, 'learning_rate': 5.215183903402601e-06, 'epoch': 0.5} + 50%|█████ | 6116/12188 [13:04:07<12:49:57, 7.61s/it] 50%|█████ | 6117/12188 [13:04:13<12:23:11, 7.35s/it] {'loss': 0.2893, 'grad_norm': 0.6427560518181888, 'learning_rate': 5.21385642080249e-06, 'epoch': 0.5} + 50%|█████ | 6117/12188 [13:04:13<12:23:11, 7.35s/it] 50%|█████ | 6118/12188 [13:04:22<12:57:35, 7.69s/it] {'loss': 0.3318, 'grad_norm': 0.6187419348946307, 'learning_rate': 5.212528923100183e-06, 'epoch': 0.5} + 50%|█████ | 6118/12188 [13:04:22<12:57:35, 7.69s/it] 50%|█████ | 6119/12188 [13:04:29<12:28:57, 7.40s/it] {'loss': 0.3029, 'grad_norm': 0.6597933244129496, 'learning_rate': 5.2112014103894226e-06, 'epoch': 0.5} + 50%|█████ | 6119/12188 [13:04:29<12:28:57, 7.40s/it] 50%|█████ | 6120/12188 [13:04:37<12:57:18, 7.69s/it] {'loss': 0.3144, 'grad_norm': 0.6134542537933947, 'learning_rate': 5.209873882763959e-06, 'epoch': 0.5} + 50%|█████ | 6120/12188 [13:04:37<12:57:18, 7.69s/it] 50%|█████ | 6121/12188 [13:04:44<12:27:25, 7.39s/it] {'loss': 0.3388, 'grad_norm': 0.6581416592121989, 'learning_rate': 5.208546340317539e-06, 'epoch': 0.5} + 50%|█████ | 6121/12188 [13:04:44<12:27:25, 7.39s/it] 50%|█████ | 6122/12188 [13:04:51<12:17:50, 7.30s/it] {'loss': 0.3539, 'grad_norm': 0.671938226723893, 'learning_rate': 5.2072187831439125e-06, 'epoch': 0.5} + 50%|█████ | 6122/12188 [13:04:51<12:17:50, 7.30s/it] 50%|█████ | 6123/12188 [13:04:58<12:05:41, 7.18s/it] {'loss': 0.2956, 'grad_norm': 0.607841869863722, 'learning_rate': 5.20589121133683e-06, 'epoch': 0.5} + 50%|█████ | 6123/12188 [13:04:58<12:05:41, 7.18s/it] 50%|█████ | 6124/12188 [13:05:05<11:57:12, 7.10s/it] {'loss': 0.3664, 'grad_norm': 0.9811062177914502, 'learning_rate': 5.204563624990038e-06, 'epoch': 0.5} + 50%|█████ | 6124/12188 [13:05:05<11:57:12, 7.10s/it] 50%|█████ | 6125/12188 [13:05:11<11:51:43, 7.04s/it] {'loss': 0.2916, 'grad_norm': 0.6680250200906155, 'learning_rate': 5.2032360241972916e-06, 'epoch': 0.5} + 50%|█████ | 6125/12188 [13:05:11<11:51:43, 7.04s/it] 50%|█████ | 6126/12188 [13:05:18<11:38:33, 6.91s/it] {'loss': 0.3366, 'grad_norm': 0.6361794479100825, 'learning_rate': 5.2019084090523445e-06, 'epoch': 0.5} + 50%|█████ | 6126/12188 [13:05:18<11:38:33, 6.91s/it] 50%|█████ | 6127/12188 [13:05:25<11:46:50, 7.00s/it] {'loss': 0.3612, 'grad_norm': 0.6635264089343025, 'learning_rate': 5.20058077964895e-06, 'epoch': 0.5} + 50%|█████ | 6127/12188 [13:05:25<11:46:50, 7.00s/it] 50%|█████ | 6128/12188 [13:05:33<12:15:02, 7.28s/it] {'loss': 0.343, 'grad_norm': 0.8000638880623595, 'learning_rate': 5.199253136080862e-06, 'epoch': 0.5} + 50%|█████ | 6128/12188 [13:05:33<12:15:02, 7.28s/it] 50%|█████ | 6129/12188 [13:05:41<12:29:48, 7.43s/it] {'loss': 0.3286, 'grad_norm': 0.6817371977583552, 'learning_rate': 5.19792547844184e-06, 'epoch': 0.5} + 50%|█████ | 6129/12188 [13:05:41<12:29:48, 7.43s/it] 50%|█████ | 6130/12188 [13:05:48<12:19:01, 7.32s/it] {'loss': 0.3014, 'grad_norm': 0.6885598833452434, 'learning_rate': 5.196597806825637e-06, 'epoch': 0.5} + 50%|█████ | 6130/12188 [13:05:48<12:19:01, 7.32s/it] 50%|█████ | 6131/12188 [13:05:55<12:01:07, 7.14s/it] {'loss': 0.3224, 'grad_norm': 0.7114739252013976, 'learning_rate': 5.195270121326012e-06, 'epoch': 0.5} + 50%|█████ | 6131/12188 [13:05:55<12:01:07, 7.14s/it] 50%|█████ | 6132/12188 [13:06:02<11:53:55, 7.07s/it] {'loss': 0.3552, 'grad_norm': 0.7254350535657679, 'learning_rate': 5.193942422036726e-06, 'epoch': 0.5} + 50%|█████ | 6132/12188 [13:06:02<11:53:55, 7.07s/it] 50%|█████ | 6133/12188 [13:06:08<11:44:32, 6.98s/it] {'loss': 0.3452, 'grad_norm': 0.7806033168023406, 'learning_rate': 5.192614709051538e-06, 'epoch': 0.5} + 50%|█████ | 6133/12188 [13:06:08<11:44:32, 6.98s/it] 50%|█████ | 6134/12188 [13:06:15<11:42:15, 6.96s/it] {'loss': 0.3847, 'grad_norm': 0.7238003794760175, 'learning_rate': 5.191286982464207e-06, 'epoch': 0.5} + 50%|█████ | 6134/12188 [13:06:15<11:42:15, 6.96s/it] 50%|█████ | 6135/12188 [13:06:23<12:02:43, 7.16s/it] {'loss': 0.3659, 'grad_norm': 0.6336882295382987, 'learning_rate': 5.189959242368499e-06, 'epoch': 0.5} + 50%|█████ | 6135/12188 [13:06:23<12:02:43, 7.16s/it] 50%|█████ | 6136/12188 [13:06:30<12:03:09, 7.17s/it] {'loss': 0.3092, 'grad_norm': 0.752440758848439, 'learning_rate': 5.188631488858173e-06, 'epoch': 0.5} + 50%|█████ | 6136/12188 [13:06:30<12:03:09, 7.17s/it] 50%|█████ | 6137/12188 [13:06:37<11:56:25, 7.10s/it] {'loss': 0.3293, 'grad_norm': 0.7384322382732299, 'learning_rate': 5.187303722026995e-06, 'epoch': 0.5} + 50%|█████ | 6137/12188 [13:06:37<11:56:25, 7.10s/it] 50%|█████ | 6138/12188 [13:06:46<12:43:51, 7.58s/it] {'loss': 0.3287, 'grad_norm': 0.7672182394032006, 'learning_rate': 5.1859759419687286e-06, 'epoch': 0.5} + 50%|█████ | 6138/12188 [13:06:46<12:43:51, 7.58s/it] 50%|█████ | 6139/12188 [13:06:53<12:20:22, 7.34s/it] {'loss': 0.3323, 'grad_norm': 0.6849687048215778, 'learning_rate': 5.184648148777139e-06, 'epoch': 0.5} + 50%|█████ | 6139/12188 [13:06:53<12:20:22, 7.34s/it] 50%|█████ | 6140/12188 [13:07:00<12:07:52, 7.22s/it] {'loss': 0.3428, 'grad_norm': 0.6765254134421824, 'learning_rate': 5.183320342545995e-06, 'epoch': 0.5} + 50%|█████ | 6140/12188 [13:07:00<12:07:52, 7.22s/it] 50%|█████ | 6141/12188 [13:07:06<11:59:17, 7.14s/it] {'loss': 0.3526, 'grad_norm': 0.7376746911460872, 'learning_rate': 5.181992523369064e-06, 'epoch': 0.5} + 50%|█████ | 6141/12188 [13:07:06<11:59:17, 7.14s/it] 50%|█████ | 6142/12188 [13:07:14<12:14:31, 7.29s/it] {'loss': 0.3248, 'grad_norm': 0.6712988919490659, 'learning_rate': 5.180664691340112e-06, 'epoch': 0.5} + 50%|█████ | 6142/12188 [13:07:14<12:14:31, 7.29s/it] 50%|█████ | 6143/12188 [13:07:23<13:02:43, 7.77s/it] {'loss': 0.291, 'grad_norm': 0.6331452828502557, 'learning_rate': 5.179336846552911e-06, 'epoch': 0.5} + 50%|█████ | 6143/12188 [13:07:23<13:02:43, 7.77s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f00209112b0> +[Try #0] Failed to fetch sample 4607574 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f00209112b0> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Watches'"}, {'from': 'gpt', 'value': '\nclick(x=0.69, y=0.266)\n'}]} + 50%|█████ | 6144/12188 [13:07:31<12:59:32, 7.74s/it] {'loss': 0.2954, 'grad_norm': 0.6468665051261759, 'learning_rate': 5.178008989101229e-06, 'epoch': 0.5} + 50%|█████ | 6144/12188 [13:07:31<12:59:32, 7.74s/it] 50%|█████ | 6145/12188 [13:07:38<12:36:13, 7.51s/it] {'loss': 0.3292, 'grad_norm': 0.6748069814018698, 'learning_rate': 5.176681119078838e-06, 'epoch': 0.5} + 50%|█████ | 6145/12188 [13:07:38<12:36:13, 7.51s/it] 50%|█████ | 6146/12188 [13:07:44<12:08:40, 7.24s/it] {'loss': 0.3697, 'grad_norm': 1.0139923215753477, 'learning_rate': 5.175353236579509e-06, 'epoch': 0.5} + 50%|█████ | 6146/12188 [13:07:44<12:08:40, 7.24s/it] 50%|█████ | 6147/12188 [13:07:51<12:03:00, 7.18s/it] {'loss': 0.2978, 'grad_norm': 0.6394764480785601, 'learning_rate': 5.174025341697018e-06, 'epoch': 0.5} + 50%|█████ | 6147/12188 [13:07:51<12:03:00, 7.18s/it] 50%|█████ | 6148/12188 [13:07:58<11:56:17, 7.12s/it] {'loss': 0.3038, 'grad_norm': 0.6454529008929889, 'learning_rate': 5.172697434525136e-06, 'epoch': 0.5} + 50%|█████ | 6148/12188 [13:07:58<11:56:17, 7.12s/it] 50%|█████ | 6149/12188 [13:08:05<11:53:00, 7.08s/it] {'loss': 0.3219, 'grad_norm': 0.7035923675926891, 'learning_rate': 5.171369515157639e-06, 'epoch': 0.5} + 50%|█████ | 6149/12188 [13:08:05<11:53:00, 7.08s/it] 50%|█████ | 6150/12188 [13:08:12<11:39:39, 6.95s/it] {'loss': 0.3329, 'grad_norm': 0.6229819244659677, 'learning_rate': 5.1700415836883034e-06, 'epoch': 0.5} + 50%|█████ | 6150/12188 [13:08:12<11:39:39, 6.95s/it] 50%|█████ | 6151/12188 [13:08:19<11:35:07, 6.91s/it] {'loss': 0.328, 'grad_norm': 0.7576197160093016, 'learning_rate': 5.168713640210903e-06, 'epoch': 0.5} + 50%|█████ | 6151/12188 [13:08:19<11:35:07, 6.91s/it] 50%|█████ | 6152/12188 [13:08:25<11:26:06, 6.82s/it] {'loss': 0.3364, 'grad_norm': 0.9278884792271498, 'learning_rate': 5.167385684819216e-06, 'epoch': 0.5} + 50%|█████ | 6152/12188 [13:08:25<11:26:06, 6.82s/it] 50%|█████ | 6153/12188 [13:08:34<12:23:20, 7.39s/it] {'loss': 0.2881, 'grad_norm': 0.6605430298094092, 'learning_rate': 5.166057717607023e-06, 'epoch': 0.5} + 50%|█████ | 6153/12188 [13:08:34<12:23:20, 7.39s/it] 50%|█████ | 6154/12188 [13:08:41<12:04:41, 7.21s/it] {'loss': 0.3363, 'grad_norm': 0.6708160407303695, 'learning_rate': 5.164729738668098e-06, 'epoch': 0.5} + 50%|█████ | 6154/12188 [13:08:41<12:04:41, 7.21s/it] 51%|█████ | 6155/12188 [13:08:48<11:50:42, 7.07s/it] {'loss': 0.2832, 'grad_norm': 0.679088197898457, 'learning_rate': 5.163401748096224e-06, 'epoch': 0.5} + 51%|█████ | 6155/12188 [13:08:48<11:50:42, 7.07s/it] 51%|█████ | 6156/12188 [13:08:56<12:18:48, 7.35s/it] {'loss': 0.3135, 'grad_norm': 0.6301317710988825, 'learning_rate': 5.162073745985182e-06, 'epoch': 0.51} + 51%|█████ | 6156/12188 [13:08:56<12:18:48, 7.35s/it] 51%|█████ | 6157/12188 [13:09:07<14:19:10, 8.55s/it] {'loss': 0.307, 'grad_norm': 0.610203024306466, 'learning_rate': 5.1607457324287535e-06, 'epoch': 0.51} + 51%|█████ | 6157/12188 [13:09:07<14:19:10, 8.55s/it] 51%|█████ | 6158/12188 [13:09:14<13:36:22, 8.12s/it] {'loss': 0.3189, 'grad_norm': 0.6311895412556618, 'learning_rate': 5.159417707520719e-06, 'epoch': 0.51} + 51%|█████ | 6158/12188 [13:09:14<13:36:22, 8.12s/it] 51%|█████ | 6159/12188 [13:09:21<12:59:25, 7.76s/it] {'loss': 0.33, 'grad_norm': 0.6576134376547381, 'learning_rate': 5.158089671354861e-06, 'epoch': 0.51} + 51%|█████ | 6159/12188 [13:09:21<12:59:25, 7.76s/it] 51%|█████ | 6160/12188 [13:09:28<12:31:52, 7.48s/it] {'loss': 0.3368, 'grad_norm': 0.5872235571849912, 'learning_rate': 5.1567616240249655e-06, 'epoch': 0.51} + 51%|█████ | 6160/12188 [13:09:28<12:31:52, 7.48s/it] 51%|█████ | 6161/12188 [13:09:36<12:48:00, 7.65s/it] {'loss': 0.3559, 'grad_norm': 0.7507417306661818, 'learning_rate': 5.155433565624814e-06, 'epoch': 0.51} + 51%|█████ | 6161/12188 [13:09:36<12:48:00, 7.65s/it] 51%|█████ | 6162/12188 [13:09:43<12:29:55, 7.47s/it] {'loss': 0.3499, 'grad_norm': 0.7007223960328275, 'learning_rate': 5.154105496248196e-06, 'epoch': 0.51} + 51%|█████ | 6162/12188 [13:09:43<12:29:55, 7.47s/it] 51%|█████ | 6163/12188 [13:09:50<12:11:19, 7.28s/it] {'loss': 0.3079, 'grad_norm': 0.6541155434215719, 'learning_rate': 5.152777415988894e-06, 'epoch': 0.51} + 51%|█████ | 6163/12188 [13:09:50<12:11:19, 7.28s/it] 51%|█████ | 6164/12188 [13:09:57<12:06:22, 7.23s/it] {'loss': 0.3095, 'grad_norm': 0.7279026130041839, 'learning_rate': 5.151449324940698e-06, 'epoch': 0.51} + 51%|█████ | 6164/12188 [13:09:57<12:06:22, 7.23s/it] 51%|█████ | 6165/12188 [13:10:04<11:54:56, 7.12s/it] {'loss': 0.32, 'grad_norm': 0.7815473601294264, 'learning_rate': 5.150121223197393e-06, 'epoch': 0.51} + 51%|█████ | 6165/12188 [13:10:04<11:54:56, 7.12s/it] 51%|█████ | 6166/12188 [13:10:11<11:47:05, 7.05s/it] {'loss': 0.3422, 'grad_norm': 0.654943063189209, 'learning_rate': 5.1487931108527686e-06, 'epoch': 0.51} + 51%|█████ | 6166/12188 [13:10:11<11:47:05, 7.05s/it] 51%|█████ | 6167/12188 [13:10:18<11:54:30, 7.12s/it] {'loss': 0.365, 'grad_norm': 0.7286314511506342, 'learning_rate': 5.147464988000615e-06, 'epoch': 0.51} + 51%|█████ | 6167/12188 [13:10:18<11:54:30, 7.12s/it] 51%|█████ | 6168/12188 [13:10:25<11:57:28, 7.15s/it] {'loss': 0.2866, 'grad_norm': 0.6113384915054785, 'learning_rate': 5.146136854734719e-06, 'epoch': 0.51} + 51%|█████ | 6168/12188 [13:10:25<11:57:28, 7.15s/it] 51%|█████ | 6169/12188 [13:10:34<12:35:58, 7.54s/it] {'loss': 0.318, 'grad_norm': 0.6917692618273381, 'learning_rate': 5.144808711148872e-06, 'epoch': 0.51} + 51%|█████ | 6169/12188 [13:10:34<12:35:58, 7.54s/it] 51%|█████ | 6170/12188 [13:10:40<12:12:24, 7.30s/it] {'loss': 0.3925, 'grad_norm': 0.6516525931459465, 'learning_rate': 5.143480557336869e-06, 'epoch': 0.51} + 51%|█████ | 6170/12188 [13:10:40<12:12:24, 7.30s/it] 51%|█████ | 6171/12188 [13:10:47<12:02:35, 7.21s/it] {'loss': 0.2956, 'grad_norm': 0.8592050565309056, 'learning_rate': 5.142152393392499e-06, 'epoch': 0.51} + 51%|█████ | 6171/12188 [13:10:47<12:02:35, 7.21s/it] 51%|█████ | 6172/12188 [13:10:55<12:09:27, 7.28s/it] {'loss': 0.2993, 'grad_norm': 0.6259381974774835, 'learning_rate': 5.140824219409555e-06, 'epoch': 0.51} + 51%|█████ | 6172/12188 [13:10:55<12:09:27, 7.28s/it] 51%|█████ | 6173/12188 [13:11:02<12:16:38, 7.35s/it] {'loss': 0.3278, 'grad_norm': 0.6818489587906601, 'learning_rate': 5.13949603548183e-06, 'epoch': 0.51} + 51%|█████ | 6173/12188 [13:11:02<12:16:38, 7.35s/it] 51%|█████ | 6174/12188 [13:11:09<12:00:04, 7.18s/it] {'loss': 0.3218, 'grad_norm': 0.6438460373275238, 'learning_rate': 5.13816784170312e-06, 'epoch': 0.51} + 51%|█████ | 6174/12188 [13:11:09<12:00:04, 7.18s/it] 51%|█████ | 6175/12188 [13:11:17<12:28:11, 7.47s/it] {'loss': 0.3078, 'grad_norm': 0.6610028154511484, 'learning_rate': 5.136839638167219e-06, 'epoch': 0.51} + 51%|█████ | 6175/12188 [13:11:17<12:28:11, 7.47s/it] 51%|█████ | 6176/12188 [13:11:25<12:24:49, 7.43s/it] {'loss': 0.3285, 'grad_norm': 0.9542628258127317, 'learning_rate': 5.1355114249679225e-06, 'epoch': 0.51} + 51%|█████ | 6176/12188 [13:11:25<12:24:49, 7.43s/it] 51%|█████ | 6177/12188 [13:11:32<12:38:21, 7.57s/it] {'loss': 0.3343, 'grad_norm': 0.7669595738407505, 'learning_rate': 5.134183202199028e-06, 'epoch': 0.51} + 51%|█████ | 6177/12188 [13:11:32<12:38:21, 7.57s/it] 51%|█████ | 6178/12188 [13:11:40<12:31:24, 7.50s/it] {'loss': 0.326, 'grad_norm': 0.6710097812059521, 'learning_rate': 5.132854969954329e-06, 'epoch': 0.51} + 51%|█████ | 6178/12188 [13:11:40<12:31:24, 7.50s/it] 51%|█████ | 6179/12188 [13:11:46<12:07:43, 7.27s/it] {'loss': 0.315, 'grad_norm': 0.6466377759646041, 'learning_rate': 5.131526728327627e-06, 'epoch': 0.51} + 51%|█████ | 6179/12188 [13:11:46<12:07:43, 7.27s/it] 51%|█████ | 6180/12188 [13:11:53<11:51:06, 7.10s/it] {'loss': 0.3392, 'grad_norm': 0.7170198091970342, 'learning_rate': 5.130198477412719e-06, 'epoch': 0.51} + 51%|█████ | 6180/12188 [13:11:53<11:51:06, 7.10s/it] 51%|█████ | 6181/12188 [13:12:01<12:02:52, 7.22s/it] {'loss': 0.3671, 'grad_norm': 0.7072778987114329, 'learning_rate': 5.128870217303403e-06, 'epoch': 0.51} + 51%|█████ | 6181/12188 [13:12:01<12:02:52, 7.22s/it] 51%|█████ | 6182/12188 [13:12:08<12:15:15, 7.35s/it] {'loss': 0.3192, 'grad_norm': 0.6427575916807178, 'learning_rate': 5.12754194809348e-06, 'epoch': 0.51} + 51%|█████ | 6182/12188 [13:12:08<12:15:15, 7.35s/it] 51%|█████ | 6183/12188 [13:12:16<12:12:23, 7.32s/it] {'loss': 0.3052, 'grad_norm': 0.7516173533643669, 'learning_rate': 5.126213669876748e-06, 'epoch': 0.51} + 51%|█████ | 6183/12188 [13:12:16<12:12:23, 7.32s/it] 51%|█████ | 6184/12188 [13:12:23<12:07:07, 7.27s/it] {'loss': 0.2967, 'grad_norm': 0.9898676602378883, 'learning_rate': 5.1248853827470114e-06, 'epoch': 0.51} + 51%|█████ | 6184/12188 [13:12:23<12:07:07, 7.27s/it] 51%|█████ | 6185/12188 [13:12:30<11:58:11, 7.18s/it] {'loss': 0.3023, 'grad_norm': 0.6431237449698677, 'learning_rate': 5.12355708679807e-06, 'epoch': 0.51} + 51%|█████ | 6185/12188 [13:12:30<11:58:11, 7.18s/it] 51%|█████ | 6186/12188 [13:12:37<11:48:15, 7.08s/it] {'loss': 0.3353, 'grad_norm': 0.7381107318182555, 'learning_rate': 5.122228782123723e-06, 'epoch': 0.51} + 51%|█████ | 6186/12188 [13:12:37<11:48:15, 7.08s/it] 51%|█████ | 6187/12188 [13:12:44<11:54:03, 7.14s/it] {'loss': 0.3553, 'grad_norm': 0.7174998033190988, 'learning_rate': 5.120900468817776e-06, 'epoch': 0.51} + 51%|█████ | 6187/12188 [13:12:44<11:54:03, 7.14s/it] 51%|█████ | 6188/12188 [13:12:51<12:04:50, 7.25s/it] {'loss': 0.3152, 'grad_norm': 0.6642493760029903, 'learning_rate': 5.119572146974034e-06, 'epoch': 0.51} + 51%|█████ | 6188/12188 [13:12:51<12:04:50, 7.25s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f6b5361b3d0> +[Try #0] Failed to fetch sample 4809814 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f6b5361b3d0> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Class: icon-btn'"}, {'from': 'gpt', 'value': '\nclick(x=0.802, y=0.419)\n'}]} + 51%|█████ | 6189/12188 [13:13:01<13:18:50, 7.99s/it] {'loss': 0.3455, 'grad_norm': 0.7121320457337056, 'learning_rate': 5.118243816686299e-06, 'epoch': 0.51} + 51%|█████ | 6189/12188 [13:13:01<13:18:50, 7.99s/it] 51%|█████ | 6190/12188 [13:13:09<13:04:21, 7.85s/it] {'loss': 0.3145, 'grad_norm': 0.6859361077688829, 'learning_rate': 5.1169154780483754e-06, 'epoch': 0.51} + 51%|█████ | 6190/12188 [13:13:09<13:04:21, 7.85s/it] 51%|█████ | 6191/12188 [13:13:16<12:58:49, 7.79s/it] {'loss': 0.3659, 'grad_norm': 0.7089908239378433, 'learning_rate': 5.115587131154068e-06, 'epoch': 0.51} + 51%|█████ | 6191/12188 [13:13:16<12:58:49, 7.79s/it] 51%|█████ | 6192/12188 [13:13:24<13:01:29, 7.82s/it] {'loss': 0.3283, 'grad_norm': 0.705611738136857, 'learning_rate': 5.114258776097184e-06, 'epoch': 0.51} + 51%|█████ | 6192/12188 [13:13:24<13:01:29, 7.82s/it] 51%|█████ | 6193/12188 [13:13:33<13:30:41, 8.11s/it] {'loss': 0.3047, 'grad_norm': 0.6671148688924149, 'learning_rate': 5.1129304129715286e-06, 'epoch': 0.51} + 51%|█████ | 6193/12188 [13:13:33<13:30:41, 8.11s/it] 51%|█████ | 6194/12188 [13:13:40<12:52:27, 7.73s/it] {'loss': 0.3008, 'grad_norm': 0.7301396883911623, 'learning_rate': 5.111602041870908e-06, 'epoch': 0.51} + 51%|█████ | 6194/12188 [13:13:40<12:52:27, 7.73s/it] 51%|█████ | 6195/12188 [13:13:47<12:33:59, 7.55s/it] {'loss': 0.2951, 'grad_norm': 0.5897363383155861, 'learning_rate': 5.11027366288913e-06, 'epoch': 0.51} + 51%|█████ | 6195/12188 [13:13:47<12:33:59, 7.55s/it] 51%|█████ | 6196/12188 [13:13:55<12:47:18, 7.68s/it] {'loss': 0.3355, 'grad_norm': 0.6798093046651194, 'learning_rate': 5.108945276120004e-06, 'epoch': 0.51} + 51%|█████ | 6196/12188 [13:13:55<12:47:18, 7.68s/it] 51%|█████ | 6197/12188 [13:14:02<12:30:51, 7.52s/it] {'loss': 0.3548, 'grad_norm': 0.6272955743271569, 'learning_rate': 5.107616881657338e-06, 'epoch': 0.51} + 51%|█████ | 6197/12188 [13:14:02<12:30:51, 7.52s/it] 51%|█████ | 6198/12188 [13:14:11<13:14:26, 7.96s/it] {'loss': 0.3481, 'grad_norm': 0.6130594209051445, 'learning_rate': 5.10628847959494e-06, 'epoch': 0.51} + 51%|█████ | 6198/12188 [13:14:11<13:14:26, 7.96s/it] 51%|█████ | 6199/12188 [13:14:18<12:36:22, 7.58s/it] {'loss': 0.3199, 'grad_norm': 1.110403727353935, 'learning_rate': 5.104960070026622e-06, 'epoch': 0.51} + 51%|█████ | 6199/12188 [13:14:18<12:36:22, 7.58s/it] 51%|█████ | 6200/12188 [13:14:25<12:26:19, 7.48s/it] {'loss': 0.3579, 'grad_norm': 0.7343184496706434, 'learning_rate': 5.1036316530461905e-06, 'epoch': 0.51} + 51%|█████ | 6200/12188 [13:14:25<12:26:19, 7.48s/it] 51%|█████ | 6201/12188 [13:14:32<12:01:05, 7.23s/it] {'loss': 0.3069, 'grad_norm': 0.6242034287661231, 'learning_rate': 5.102303228747458e-06, 'epoch': 0.51} + 51%|█████ | 6201/12188 [13:14:32<12:01:05, 7.23s/it] 51%|█████ | 6202/12188 [13:14:39<11:54:00, 7.16s/it] {'loss': 0.3241, 'grad_norm': 0.6458645484761398, 'learning_rate': 5.1009747972242375e-06, 'epoch': 0.51} + 51%|█████ | 6202/12188 [13:14:39<11:54:00, 7.16s/it] 51%|█████ | 6203/12188 [13:14:46<11:50:04, 7.12s/it] {'loss': 0.2891, 'grad_norm': 0.7527947975521535, 'learning_rate': 5.099646358570338e-06, 'epoch': 0.51} + 51%|█████ | 6203/12188 [13:14:46<11:50:04, 7.12s/it] 51%|█████ | 6204/12188 [13:14:52<11:39:44, 7.02s/it] {'loss': 0.3434, 'grad_norm': 0.6817048773540239, 'learning_rate': 5.098317912879572e-06, 'epoch': 0.51} + 51%|█████ | 6204/12188 [13:14:52<11:39:44, 7.02s/it] 51%|█████ | 6205/12188 [13:15:00<11:43:47, 7.06s/it] {'loss': 0.3221, 'grad_norm': 0.6483723377012615, 'learning_rate': 5.0969894602457556e-06, 'epoch': 0.51} + 51%|█████ | 6205/12188 [13:15:00<11:43:47, 7.06s/it] 51%|█████ | 6206/12188 [13:15:07<11:44:52, 7.07s/it] {'loss': 0.3762, 'grad_norm': 0.6614716431191098, 'learning_rate': 5.0956610007626974e-06, 'epoch': 0.51} + 51%|█████ | 6206/12188 [13:15:07<11:44:52, 7.07s/it] 51%|█████ | 6207/12188 [13:15:14<11:46:24, 7.09s/it] {'loss': 0.3118, 'grad_norm': 0.9211816286022725, 'learning_rate': 5.094332534524214e-06, 'epoch': 0.51} + 51%|█████ | 6207/12188 [13:15:14<11:46:24, 7.09s/it] 51%|█████ | 6208/12188 [13:15:21<12:05:29, 7.28s/it] {'loss': 0.324, 'grad_norm': 0.724486710768803, 'learning_rate': 5.093004061624118e-06, 'epoch': 0.51} + 51%|█████ | 6208/12188 [13:15:21<12:05:29, 7.28s/it] 51%|█████ | 6209/12188 [13:15:28<11:55:42, 7.18s/it] {'loss': 0.303, 'grad_norm': 0.7396572594024775, 'learning_rate': 5.091675582156224e-06, 'epoch': 0.51} + 51%|█████ | 6209/12188 [13:15:28<11:55:42, 7.18s/it] 51%|█████ | 6210/12188 [13:15:35<11:48:05, 7.11s/it] {'loss': 0.324, 'grad_norm': 0.6968157565988639, 'learning_rate': 5.0903470962143476e-06, 'epoch': 0.51} + 51%|█████ | 6210/12188 [13:15:35<11:48:05, 7.11s/it] 51%|█████ | 6211/12188 [13:15:42<11:47:18, 7.10s/it] {'loss': 0.3405, 'grad_norm': 0.6859223151100055, 'learning_rate': 5.089018603892305e-06, 'epoch': 0.51} + 51%|█████ | 6211/12188 [13:15:42<11:47:18, 7.10s/it] 51%|█████ | 6212/12188 [13:15:50<12:12:27, 7.35s/it] {'loss': 0.3202, 'grad_norm': 0.6750635162055268, 'learning_rate': 5.0876901052839115e-06, 'epoch': 0.51} + 51%|█████ | 6212/12188 [13:15:50<12:12:27, 7.35s/it] 51%|█████ | 6213/12188 [13:15:57<11:51:25, 7.14s/it] {'loss': 0.3703, 'grad_norm': 0.7263142033875682, 'learning_rate': 5.086361600482984e-06, 'epoch': 0.51} + 51%|█████ | 6213/12188 [13:15:57<11:51:25, 7.14s/it] 51%|█████ | 6214/12188 [13:16:04<11:54:03, 7.17s/it] {'loss': 0.2845, 'grad_norm': 0.5729729372028806, 'learning_rate': 5.0850330895833385e-06, 'epoch': 0.51} + 51%|█████ | 6214/12188 [13:16:04<11:54:03, 7.17s/it] 51%|█████ | 6215/12188 [13:16:12<11:57:24, 7.21s/it] {'loss': 0.2988, 'grad_norm': 0.6779079813263039, 'learning_rate': 5.083704572678794e-06, 'epoch': 0.51} + 51%|█████ | 6215/12188 [13:16:12<11:57:24, 7.21s/it] 51%|█████ | 6216/12188 [13:16:18<11:41:43, 7.05s/it] {'loss': 0.3237, 'grad_norm': 0.8180727108863056, 'learning_rate': 5.082376049863166e-06, 'epoch': 0.51} + 51%|█████ | 6216/12188 [13:16:18<11:41:43, 7.05s/it] 51%|█████ | 6217/12188 [13:16:26<11:48:03, 7.12s/it] {'loss': 0.3479, 'grad_norm': 0.8385886029679234, 'learning_rate': 5.081047521230274e-06, 'epoch': 0.51} + 51%|█████ | 6217/12188 [13:16:26<11:48:03, 7.12s/it] 51%|█████ | 6218/12188 [13:16:33<12:04:40, 7.28s/it] {'loss': 0.3906, 'grad_norm': 0.7284224010811763, 'learning_rate': 5.079718986873935e-06, 'epoch': 0.51} + 51%|█████ | 6218/12188 [13:16:33<12:04:40, 7.28s/it] 51%|█████ | 6219/12188 [13:16:41<12:13:45, 7.38s/it] {'loss': 0.3216, 'grad_norm': 0.6945855185027167, 'learning_rate': 5.0783904468879715e-06, 'epoch': 0.51} + 51%|█████ | 6219/12188 [13:16:41<12:13:45, 7.38s/it] 51%|█████ | 6220/12188 [13:16:48<12:07:02, 7.31s/it] {'loss': 0.3294, 'grad_norm': 0.6469474235374474, 'learning_rate': 5.077061901366199e-06, 'epoch': 0.51} + 51%|█████ | 6220/12188 [13:16:48<12:07:02, 7.31s/it] 51%|█████ | 6221/12188 [13:16:56<12:43:32, 7.68s/it] {'loss': 0.3226, 'grad_norm': 0.7084266807241337, 'learning_rate': 5.075733350402439e-06, 'epoch': 0.51} + 51%|█████ | 6221/12188 [13:16:56<12:43:32, 7.68s/it] 51%|█████ | 6222/12188 [13:17:06<13:48:16, 8.33s/it] {'loss': 0.3309, 'grad_norm': 0.6446478842255121, 'learning_rate': 5.074404794090512e-06, 'epoch': 0.51} + 51%|█████ | 6222/12188 [13:17:06<13:48:16, 8.33s/it] 51%|█████ | 6223/12188 [13:17:15<13:59:45, 8.45s/it] {'loss': 0.3295, 'grad_norm': 0.6269334413714738, 'learning_rate': 5.073076232524238e-06, 'epoch': 0.51} + 51%|█████ | 6223/12188 [13:17:15<13:59:45, 8.45s/it] 51%|█████ | 6224/12188 [13:17:22<13:22:16, 8.07s/it] {'loss': 0.3217, 'grad_norm': 0.731116716220615, 'learning_rate': 5.071747665797437e-06, 'epoch': 0.51} + 51%|█████ | 6224/12188 [13:17:22<13:22:16, 8.07s/it] 51%|█████ | 6225/12188 [13:17:30<13:00:08, 7.85s/it] {'loss': 0.3192, 'grad_norm': 0.7243773795701205, 'learning_rate': 5.0704190940039326e-06, 'epoch': 0.51} + 51%|█████ | 6225/12188 [13:17:30<13:00:08, 7.85s/it] 51%|█████ | 6226/12188 [13:17:36<12:30:25, 7.55s/it] {'loss': 0.2768, 'grad_norm': 0.682465446981013, 'learning_rate': 5.069090517237544e-06, 'epoch': 0.51} + 51%|█████ | 6226/12188 [13:17:36<12:30:25, 7.55s/it] 51%|█████ | 6227/12188 [13:17:43<12:07:37, 7.32s/it] {'loss': 0.3457, 'grad_norm': 0.6564719848395066, 'learning_rate': 5.0677619355920935e-06, 'epoch': 0.51} + 51%|█████ | 6227/12188 [13:17:43<12:07:37, 7.32s/it] 51%|█████ | 6228/12188 [13:17:51<12:26:55, 7.52s/it] {'loss': 0.2752, 'grad_norm': 0.5996189534031003, 'learning_rate': 5.066433349161405e-06, 'epoch': 0.51} + 51%|█████ | 6228/12188 [13:17:51<12:26:55, 7.52s/it] 51%|█████ | 6229/12188 [13:17:59<12:41:09, 7.66s/it] {'loss': 0.3568, 'grad_norm': 0.7120756382029867, 'learning_rate': 5.0651047580392995e-06, 'epoch': 0.51} + 51%|█████ | 6229/12188 [13:17:59<12:41:09, 7.66s/it] 51%|█████ | 6230/12188 [13:18:06<12:20:19, 7.46s/it] {'loss': 0.39, 'grad_norm': 0.834137465398037, 'learning_rate': 5.063776162319601e-06, 'epoch': 0.51} + 51%|█████ | 6230/12188 [13:18:06<12:20:19, 7.46s/it] 51%|█████ | 6231/12188 [13:18:14<12:37:00, 7.62s/it] {'loss': 0.3118, 'grad_norm': 0.6918539259726886, 'learning_rate': 5.062447562096131e-06, 'epoch': 0.51} + 51%|█████ | 6231/12188 [13:18:14<12:37:00, 7.62s/it] 51%|█████ | 6232/12188 [13:18:21<12:12:27, 7.38s/it] {'loss': 0.3645, 'grad_norm': 0.6992027917321748, 'learning_rate': 5.061118957462716e-06, 'epoch': 0.51} + 51%|█████ | 6232/12188 [13:18:21<12:12:27, 7.38s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 899, in process_image_unified + visual_processed = processor.preprocess(image, return_tensors="pt") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 432, in preprocess + patches, image_grid_thw = self._preprocess( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 252, in _preprocess + resized_height, resized_width = smart_resize( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 69, in smart_resize + raise ValueError(f"height:{height} and width:{width} must be larger than factor:{factor}") +ValueError: height:21 and width:135 must be larger than factor:28 +[Try #0] Failed to fetch sample 2074782 in VC:s3://gui-agent/jedi/images/figma400k/figma400k_extracted/. Exception: height:21 and width:135 must be larger than factor:28 +Problematic sample: {'image': 'a4b739659e7c325d58fab0a3e55e135c875dc9a485e2b2af942dbafdf4662908.png', 'conversations': [{'from': 'human', 'value': '\nThe position of this Icon can be described as:\nThe icon is located in the top navigation bar, slightly to the right of the center. It is positioned between the RustyLoot logo on the left and a series of other icons on the right, such as user profile and settings icons.\n\nFunctional capabilities of the Icon:\nThis icon likely serves as a status indicator or a shortcut to a specific feature or section within the application, possibly related to user achievements or rewards.'}, {'from': 'gpt', 'value': '[[0, 0, 1000, 1000]]', 'recipient': 'all', 'end_turn': True}]} + 51%|█████ | 6233/12188 [13:18:29<12:16:38, 7.42s/it] {'loss': 0.331, 'grad_norm': 1.0101645571755509, 'learning_rate': 5.0597903485131795e-06, 'epoch': 0.51} + 51%|█████ | 6233/12188 [13:18:29<12:16:38, 7.42s/it] 51%|█████ | 6234/12188 [13:18:36<12:17:56, 7.44s/it] {'loss': 0.3358, 'grad_norm': 0.6906606048212824, 'learning_rate': 5.058461735341342e-06, 'epoch': 0.51} + 51%|█████ | 6234/12188 [13:18:36<12:17:56, 7.44s/it] 51%|█████ | 6235/12188 [13:18:44<12:46:28, 7.73s/it] {'loss': 0.3831, 'grad_norm': 0.7143036148928786, 'learning_rate': 5.057133118041033e-06, 'epoch': 0.51} + 51%|█████ | 6235/12188 [13:18:44<12:46:28, 7.73s/it] 51%|█████ | 6236/12188 [13:18:52<12:40:22, 7.67s/it] {'loss': 0.3466, 'grad_norm': 0.7242335836530975, 'learning_rate': 5.055804496706072e-06, 'epoch': 0.51} + 51%|█████ | 6236/12188 [13:18:52<12:40:22, 7.67s/it] 51%|█████ | 6237/12188 [13:18:59<12:09:27, 7.35s/it] {'loss': 0.3942, 'grad_norm': 0.7070541799017314, 'learning_rate': 5.05447587143029e-06, 'epoch': 0.51} + 51%|█████ | 6237/12188 [13:18:59<12:09:27, 7.35s/it] 51%|█████ | 6238/12188 [13:19:05<11:51:19, 7.17s/it] {'loss': 0.3674, 'grad_norm': 0.6821295853247645, 'learning_rate': 5.053147242307508e-06, 'epoch': 0.51} + 51%|█████ | 6238/12188 [13:19:05<11:51:19, 7.17s/it] 51%|█████ | 6239/12188 [13:19:12<11:43:01, 7.09s/it] {'loss': 0.3219, 'grad_norm': 0.6843494275280468, 'learning_rate': 5.051818609431554e-06, 'epoch': 0.51} + 51%|█████ | 6239/12188 [13:19:12<11:43:01, 7.09s/it] 51%|█████ | 6240/12188 [13:19:20<12:02:38, 7.29s/it] {'loss': 0.3468, 'grad_norm': 0.7109406089346123, 'learning_rate': 5.050489972896253e-06, 'epoch': 0.51} + 51%|█████ | 6240/12188 [13:19:20<12:02:38, 7.29s/it] 51%|█████ | 6241/12188 [13:19:28<12:27:21, 7.54s/it] {'loss': 0.3237, 'grad_norm': 0.768381150568973, 'learning_rate': 5.049161332795431e-06, 'epoch': 0.51} + 51%|█████ | 6241/12188 [13:19:28<12:27:21, 7.54s/it] 51%|█████ | 6242/12188 [13:19:35<12:06:09, 7.33s/it] {'loss': 0.2992, 'grad_norm': 0.8030191724350119, 'learning_rate': 5.047832689222914e-06, 'epoch': 0.51} + 51%|█████ | 6242/12188 [13:19:35<12:06:09, 7.33s/it] 51%|█████ | 6243/12188 [13:19:42<12:09:11, 7.36s/it] {'loss': 0.3215, 'grad_norm': 0.6491892115890736, 'learning_rate': 5.046504042272531e-06, 'epoch': 0.51} + 51%|█████ | 6243/12188 [13:19:42<12:09:11, 7.36s/it] 51%|█████ | 6244/12188 [13:19:50<12:32:47, 7.60s/it] {'loss': 0.2979, 'grad_norm': 0.642599302460327, 'learning_rate': 5.0451753920381056e-06, 'epoch': 0.51} + 51%|█████ | 6244/12188 [13:19:51<12:32:47, 7.60s/it] 51%|█████ | 6245/12188 [13:19:57<12:09:22, 7.36s/it] {'loss': 0.3284, 'grad_norm': 0.6780806420679877, 'learning_rate': 5.043846738613466e-06, 'epoch': 0.51} + 51%|█████ | 6245/12188 [13:19:57<12:09:22, 7.36s/it] 51%|█████ | 6246/12188 [13:20:04<11:46:48, 7.14s/it] {'loss': 0.3117, 'grad_norm': 0.8205128788453121, 'learning_rate': 5.0425180820924415e-06, 'epoch': 0.51} + 51%|█████ | 6246/12188 [13:20:04<11:46:48, 7.14s/it] 51%|█████▏ | 6247/12188 [13:20:11<11:39:09, 7.06s/it] {'loss': 0.3284, 'grad_norm': 0.6159462650171361, 'learning_rate': 5.04118942256886e-06, 'epoch': 0.51} + 51%|█████▏ | 6247/12188 [13:20:11<11:39:09, 7.06s/it] 51%|█████▏ | 6248/12188 [13:20:19<12:01:08, 7.28s/it] {'loss': 0.2981, 'grad_norm': 0.8531120567255412, 'learning_rate': 5.039860760136546e-06, 'epoch': 0.51} + 51%|█████▏ | 6248/12188 [13:20:19<12:01:08, 7.28s/it] 51%|█████▏ | 6249/12188 [13:20:26<12:11:43, 7.39s/it] {'loss': 0.3255, 'grad_norm': 0.6213923323638715, 'learning_rate': 5.038532094889327e-06, 'epoch': 0.51} + 51%|█████▏ | 6249/12188 [13:20:26<12:11:43, 7.39s/it] 51%|█████▏ | 6250/12188 [13:20:34<12:24:53, 7.53s/it] {'loss': 0.3914, 'grad_norm': 0.6827982171350587, 'learning_rate': 5.037203426921036e-06, 'epoch': 0.51} + 51%|█████▏ | 6250/12188 [13:20:34<12:24:53, 7.53s/it] 51%|█████▏ | 6251/12188 [13:20:41<12:10:22, 7.38s/it] {'loss': 0.3141, 'grad_norm': 0.6146004958505239, 'learning_rate': 5.035874756325498e-06, 'epoch': 0.51} + 51%|█████▏ | 6251/12188 [13:20:41<12:10:22, 7.38s/it] 51%|█████▏ | 6252/12188 [13:20:48<11:55:36, 7.23s/it] {'loss': 0.2765, 'grad_norm': 0.7532862560973611, 'learning_rate': 5.034546083196542e-06, 'epoch': 0.51} + 51%|█████▏ | 6252/12188 [13:20:48<11:55:36, 7.23s/it] 51%|█████▏ | 6253/12188 [13:20:55<11:35:06, 7.03s/it] {'loss': 0.3355, 'grad_norm': 0.6448313375069078, 'learning_rate': 5.033217407627998e-06, 'epoch': 0.51} + 51%|█████▏ | 6253/12188 [13:20:55<11:35:06, 7.03s/it] 51%|█████▏ | 6254/12188 [13:21:02<11:33:06, 7.01s/it] {'loss': 0.3032, 'grad_norm': 0.6473600323359883, 'learning_rate': 5.031888729713693e-06, 'epoch': 0.51} + 51%|█████▏ | 6254/12188 [13:21:02<11:33:06, 7.01s/it] 51%|█████▏ | 6255/12188 [13:21:09<11:36:35, 7.04s/it] {'loss': 0.3696, 'grad_norm': 0.6991865539790592, 'learning_rate': 5.0305600495474586e-06, 'epoch': 0.51} + 51%|█████▏ | 6255/12188 [13:21:09<11:36:35, 7.04s/it] 51%|█████▏ | 6256/12188 [13:21:16<11:34:46, 7.03s/it] {'loss': 0.3458, 'grad_norm': 0.6864863230702278, 'learning_rate': 5.029231367223122e-06, 'epoch': 0.51} + 51%|█████▏ | 6256/12188 [13:21:16<11:34:46, 7.03s/it] 51%|█████▏ | 6257/12188 [13:21:23<11:37:49, 7.06s/it] {'loss': 0.3153, 'grad_norm': 0.7014872669767792, 'learning_rate': 5.027902682834512e-06, 'epoch': 0.51} + 51%|█████▏ | 6257/12188 [13:21:23<11:37:49, 7.06s/it] 51%|█████▏ | 6258/12188 [13:21:30<11:40:09, 7.08s/it] {'loss': 0.3879, 'grad_norm': 0.7295244391843319, 'learning_rate': 5.02657399647546e-06, 'epoch': 0.51} + 51%|█████▏ | 6258/12188 [13:21:30<11:40:09, 7.08s/it] 51%|█████▏ | 6259/12188 [13:21:37<11:25:35, 6.94s/it] {'loss': 0.3071, 'grad_norm': 0.6534739534666046, 'learning_rate': 5.025245308239795e-06, 'epoch': 0.51} + 51%|█████▏ | 6259/12188 [13:21:37<11:25:35, 6.94s/it] 51%|█████▏ | 6260/12188 [13:21:44<11:33:24, 7.02s/it] {'loss': 0.3068, 'grad_norm': 0.6978070273663871, 'learning_rate': 5.0239166182213486e-06, 'epoch': 0.51} + 51%|█████▏ | 6260/12188 [13:21:44<11:33:24, 7.02s/it] 51%|█████▏ | 6261/12188 [13:21:52<12:06:16, 7.35s/it] {'loss': 0.3056, 'grad_norm': 0.9725471909719373, 'learning_rate': 5.022587926513946e-06, 'epoch': 0.51} + 51%|█████▏ | 6261/12188 [13:21:52<12:06:16, 7.35s/it] 51%|█████▏ | 6262/12188 [13:21:59<11:54:29, 7.23s/it] {'loss': 0.3625, 'grad_norm': 0.6218270088833704, 'learning_rate': 5.021259233211424e-06, 'epoch': 0.51} + 51%|█████▏ | 6262/12188 [13:21:59<11:54:29, 7.23s/it] 51%|█████▏ | 6263/12188 [13:22:06<11:49:49, 7.19s/it] {'loss': 0.3255, 'grad_norm': 0.8256569135673905, 'learning_rate': 5.019930538407607e-06, 'epoch': 0.51} + 51%|█████▏ | 6263/12188 [13:22:06<11:49:49, 7.19s/it] 51%|█████▏ | 6264/12188 [13:22:13<11:53:36, 7.23s/it] {'loss': 0.3526, 'grad_norm': 0.6987161452173056, 'learning_rate': 5.018601842196326e-06, 'epoch': 0.51} + 51%|█████▏ | 6264/12188 [13:22:13<11:53:36, 7.23s/it] 51%|█████▏ | 6265/12188 [13:22:21<12:01:10, 7.31s/it] {'loss': 0.3315, 'grad_norm': 0.778872577205239, 'learning_rate': 5.017273144671415e-06, 'epoch': 0.51} + 51%|█████▏ | 6265/12188 [13:22:21<12:01:10, 7.31s/it] 51%|█████▏ | 6266/12188 [13:22:28<11:59:43, 7.29s/it] {'loss': 0.3479, 'grad_norm': 0.7352479083218652, 'learning_rate': 5.015944445926702e-06, 'epoch': 0.51} + 51%|█████▏ | 6266/12188 [13:22:28<11:59:43, 7.29s/it] 51%|█████▏ | 6267/12188 [13:22:35<12:01:11, 7.31s/it] {'loss': 0.3366, 'grad_norm': 0.7055458171348938, 'learning_rate': 5.014615746056017e-06, 'epoch': 0.51} + 51%|█████▏ | 6267/12188 [13:22:35<12:01:11, 7.31s/it] 51%|█████▏ | 6268/12188 [13:22:43<12:09:25, 7.39s/it] {'loss': 0.311, 'grad_norm': 0.6644579199081649, 'learning_rate': 5.013287045153192e-06, 'epoch': 0.51} + 51%|█████▏ | 6268/12188 [13:22:43<12:09:25, 7.39s/it] 51%|███��█▏ | 6269/12188 [13:22:50<11:53:37, 7.23s/it] {'loss': 0.3352, 'grad_norm': 0.6713774660761762, 'learning_rate': 5.011958343312058e-06, 'epoch': 0.51} + 51%|█████▏ | 6269/12188 [13:22:50<11:53:37, 7.23s/it] 51%|█████▏ | 6270/12188 [13:22:57<11:45:53, 7.16s/it] {'loss': 0.359, 'grad_norm': 0.6676976407555815, 'learning_rate': 5.010629640626444e-06, 'epoch': 0.51} + 51%|█████▏ | 6270/12188 [13:22:57<11:45:53, 7.16s/it] 51%|█████▏ | 6271/12188 [13:23:04<11:37:58, 7.08s/it] {'loss': 0.3221, 'grad_norm': 0.9587958720083599, 'learning_rate': 5.0093009371901826e-06, 'epoch': 0.51} + 51%|█████▏ | 6271/12188 [13:23:04<11:37:58, 7.08s/it] 51%|█████▏ | 6272/12188 [13:23:11<11:39:23, 7.09s/it] {'loss': 0.3432, 'grad_norm': 0.6589971149427153, 'learning_rate': 5.007972233097103e-06, 'epoch': 0.51} + 51%|█████▏ | 6272/12188 [13:23:11<11:39:23, 7.09s/it] 51%|█████▏ | 6273/12188 [13:23:18<11:32:51, 7.03s/it] {'loss': 0.324, 'grad_norm': 0.7016904098763822, 'learning_rate': 5.0066435284410365e-06, 'epoch': 0.51} + 51%|█████▏ | 6273/12188 [13:23:18<11:32:51, 7.03s/it] 51%|█████▏ | 6274/12188 [13:23:25<11:42:46, 7.13s/it] {'loss': 0.3209, 'grad_norm': 0.8720232162755597, 'learning_rate': 5.005314823315816e-06, 'epoch': 0.51} + 51%|█████▏ | 6274/12188 [13:23:25<11:42:46, 7.13s/it] 51%|█████▏ | 6275/12188 [13:23:33<12:14:58, 7.46s/it] {'loss': 0.364, 'grad_norm': 0.8086267527476155, 'learning_rate': 5.00398611781527e-06, 'epoch': 0.51} + 51%|█████▏ | 6275/12188 [13:23:33<12:14:58, 7.46s/it] 51%|█████▏ | 6276/12188 [13:23:41<12:16:02, 7.47s/it] {'loss': 0.3058, 'grad_norm': 0.6901219185681543, 'learning_rate': 5.002657412033233e-06, 'epoch': 0.51} + 51%|█████▏ | 6276/12188 [13:23:41<12:16:02, 7.47s/it] 52%|█████▏ | 6277/12188 [13:23:48<11:59:07, 7.30s/it] {'loss': 0.3287, 'grad_norm': 0.6324082962090889, 'learning_rate': 5.001328706063532e-06, 'epoch': 0.51} + 52%|█████▏ | 6277/12188 [13:23:48<11:59:07, 7.30s/it] 52%|█████▏ | 6278/12188 [13:23:55<12:02:16, 7.33s/it] {'loss': 0.3665, 'grad_norm': 0.9034650092530822, 'learning_rate': 5e-06, 'epoch': 0.52} + 52%|█████▏ | 6278/12188 [13:23:55<12:02:16, 7.33s/it] 52%|█████▏ | 6279/12188 [13:24:02<11:47:56, 7.19s/it] {'loss': 0.3869, 'grad_norm': 0.739017032251046, 'learning_rate': 4.998671293936468e-06, 'epoch': 0.52} + 52%|█████▏ | 6279/12188 [13:24:02<11:47:56, 7.19s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7febbb83de40> +[Try #0] Failed to fetch sample 4336098 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7febbb83de40> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Wikimedia Foundation'"}, {'from': 'gpt', 'value': '\nclick(x=0.9215, y=0.64)\n'}]} + 52%|█████▏ | 6280/12188 [13:24:09<11:52:21, 7.23s/it] {'loss': 0.2691, 'grad_norm': 0.6165443663891559, 'learning_rate': 4.997342587966769e-06, 'epoch': 0.52} + 52%|█████▏ | 6280/12188 [13:24:09<11:52:21, 7.23s/it] 52%|█████▏ | 6281/12188 [13:24:17<12:00:51, 7.32s/it] {'loss': 0.3172, 'grad_norm': 0.7064045367105056, 'learning_rate': 4.9960138821847305e-06, 'epoch': 0.52} + 52%|█████▏ | 6281/12188 [13:24:17<12:00:51, 7.32s/it] 52%|█████▏ | 6282/12188 [13:24:23<11:42:19, 7.14s/it] {'loss': 0.3295, 'grad_norm': 0.8055243615107298, 'learning_rate': 4.994685176684185e-06, 'epoch': 0.52} + 52%|█████▏ | 6282/12188 [13:24:23<11:42:19, 7.14s/it] 52%|█████▏ | 6283/12188 [13:24:30<11:35:37, 7.07s/it] {'loss': 0.3043, 'grad_norm': 0.7320038235611215, 'learning_rate': 4.993356471558965e-06, 'epoch': 0.52} + 52%|█████▏ | 6283/12188 [13:24:30<11:35:37, 7.07s/it] 52%|█████▏ | 6284/12188 [13:24:38<11:40:03, 7.11s/it] {'loss': 0.3277, 'grad_norm': 0.6684841915904202, 'learning_rate': 4.992027766902898e-06, 'epoch': 0.52} + 52%|█████▏ | 6284/12188 [13:24:38<11:40:03, 7.11s/it] 52%|█████▏ | 6285/12188 [13:24:46<12:21:48, 7.54s/it] {'loss': 0.3365, 'grad_norm': 0.6564097209868054, 'learning_rate': 4.99069906280982e-06, 'epoch': 0.52} + 52%|█████▏ | 6285/12188 [13:24:46<12:21:48, 7.54s/it] 52%|█████▏ | 6286/12188 [13:24:53<12:13:14, 7.45s/it] {'loss': 0.3306, 'grad_norm': 0.651512792248995, 'learning_rate': 4.989370359373557e-06, 'epoch': 0.52} + 52%|█████▏ | 6286/12188 [13:24:53<12:13:14, 7.45s/it] 52%|█████▏ | 6287/12188 [13:25:00<11:41:19, 7.13s/it] {'loss': 0.3132, 'grad_norm': 0.8571551357555078, 'learning_rate': 4.988041656687944e-06, 'epoch': 0.52} + 52%|█████▏ | 6287/12188 [13:25:00<11:41:19, 7.13s/it] 52%|█████▏ | 6288/12188 [13:25:07<11:39:26, 7.11s/it] {'loss': 0.3208, 'grad_norm': 0.7146762298809404, 'learning_rate': 4.986712954846809e-06, 'epoch': 0.52} + 52%|█████▏ | 6288/12188 [13:25:07<11:39:26, 7.11s/it] 52%|█████▏ | 6289/12188 [13:25:14<11:45:36, 7.18s/it] {'loss': 0.3071, 'grad_norm': 0.6814279219341846, 'learning_rate': 4.985384253943984e-06, 'epoch': 0.52} + 52%|█████▏ | 6289/12188 [13:25:14<11:45:36, 7.18s/it] 52%|█████▏ | 6290/12188 [13:25:21<11:48:36, 7.21s/it] {'loss': 0.3688, 'grad_norm': 0.8345226437784572, 'learning_rate': 4.9840555540733e-06, 'epoch': 0.52} + 52%|█████▏ | 6290/12188 [13:25:21<11:48:36, 7.21s/it] 52%|█████▏ | 6291/12188 [13:25:28<11:34:36, 7.07s/it] {'loss': 0.3574, 'grad_norm': 0.6076427478298173, 'learning_rate': 4.982726855328586e-06, 'epoch': 0.52} + 52%|█████▏ | 6291/12188 [13:25:28<11:34:36, 7.07s/it] 52%|█████▏ | 6292/12188 [13:25:36<12:09:05, 7.42s/it] {'loss': 0.3792, 'grad_norm': 0.7878642144527467, 'learning_rate': 4.981398157803674e-06, 'epoch': 0.52} + 52%|█████▏ | 6292/12188 [13:25:36<12:09:05, 7.42s/it] 52%|█████▏ | 6293/12188 [13:25:43<11:40:33, 7.13s/it] {'loss': 0.3199, 'grad_norm': 0.7653276010091864, 'learning_rate': 4.980069461592395e-06, 'epoch': 0.52} + 52%|█████▏ | 6293/12188 [13:25:43<11:40:33, 7.13s/it] 52%|█████▏ | 6294/12188 [13:25:50<11:55:15, 7.28s/it] {'loss': 0.3165, 'grad_norm': 0.664061097255933, 'learning_rate': 4.978740766788578e-06, 'epoch': 0.52} + 52%|█████▏ | 6294/12188 [13:25:51<11:55:15, 7.28s/it] 52%|█████▏ | 6295/12188 [13:25:57<11:43:37, 7.16s/it] {'loss': 0.3535, 'grad_norm': 0.6399794163433625, 'learning_rate': 4.977412073486055e-06, 'epoch': 0.52} + 52%|█████▏ | 6295/12188 [13:25:57<11:43:37, 7.16s/it] 52%|█████▏ | 6296/12188 [13:26:04<11:28:47, 7.01s/it] {'loss': 0.3359, 'grad_norm': 0.7518753587977639, 'learning_rate': 4.976083381778654e-06, 'epoch': 0.52} + 52%|█████▏ | 6296/12188 [13:26:04<11:28:47, 7.01s/it] 52%|█████▏ | 6297/12188 [13:26:11<11:34:59, 7.08s/it] {'loss': 0.3142, 'grad_norm': 0.673333460748579, 'learning_rate': 4.974754691760205e-06, 'epoch': 0.52} + 52%|█████▏ | 6297/12188 [13:26:11<11:34:59, 7.08s/it] 52%|█████▏ | 6298/12188 [13:26:18<11:35:28, 7.08s/it] {'loss': 0.3245, 'grad_norm': 0.7170494221395433, 'learning_rate': 4.973426003524541e-06, 'epoch': 0.52} + 52%|█████▏ | 6298/12188 [13:26:18<11:35:28, 7.08s/it] 52%|█████▏ | 6299/12188 [13:26:25<11:33:51, 7.07s/it] {'loss': 0.3562, 'grad_norm': 0.6217869748416132, 'learning_rate': 4.972097317165489e-06, 'epoch': 0.52} + 52%|█████▏ | 6299/12188 [13:26:25<11:33:51, 7.07s/it] 52%|█████▏ | 6300/12188 [13:26:33<11:41:08, 7.14s/it] {'loss': 0.3154, 'grad_norm': 0.752618322253961, 'learning_rate': 4.9707686327768805e-06, 'epoch': 0.52} + 52%|█████▏ | 6300/12188 [13:26:33<11:41:08, 7.14s/it] 52%|█████▏ | 6301/12188 [13:26:41<12:06:37, 7.41s/it] {'loss': 0.2773, 'grad_norm': 0.6328261260313488, 'learning_rate': 4.969439950452543e-06, 'epoch': 0.52} + 52%|█████▏ | 6301/12188 [13:26:41<12:06:37, 7.41s/it] 52%|█████▏ | 6302/12188 [13:26:48<11:54:23, 7.28s/it] {'loss': 0.3389, 'grad_norm': 0.7757745420528865, 'learning_rate': 4.968111270286308e-06, 'epoch': 0.52} + 52%|█████▏ | 6302/12188 [13:26:48<11:54:23, 7.28s/it] 52%|█████▏ | 6303/12188 [13:26:55<12:02:24, 7.37s/it] {'loss': 0.3156, 'grad_norm': 0.7710245049494594, 'learning_rate': 4.966782592372003e-06, 'epoch': 0.52} + 52%|█████▏ | 6303/12188 [13:26:55<12:02:24, 7.37s/it] 52%|█████▏ | 6304/12188 [13:27:02<11:46:46, 7.21s/it] {'loss': 0.3219, 'grad_norm': 0.7154113956241509, 'learning_rate': 4.965453916803459e-06, 'epoch': 0.52} + 52%|█████▏ | 6304/12188 [13:27:02<11:46:46, 7.21s/it] 52%|█████▏ | 6305/12188 [13:27:10<12:06:13, 7.41s/it] {'loss': 0.3432, 'grad_norm': 0.6573657399873225, 'learning_rate': 4.964125243674502e-06, 'epoch': 0.52} + 52%|█████▏ | 6305/12188 [13:27:10<12:06:13, 7.41s/it] 52%|█████▏ | 6306/12188 [13:27:17<11:54:09, 7.28s/it] {'loss': 0.3824, 'grad_norm': 0.7014073976857707, 'learning_rate': 4.962796573078966e-06, 'epoch': 0.52} + 52%|█████▏ | 6306/12188 [13:27:17<11:54:09, 7.28s/it] 52%|█████▏ | 6307/12188 [13:27:24<11:38:33, 7.13s/it] {'loss': 0.326, 'grad_norm': 0.6476866273111834, 'learning_rate': 4.961467905110673e-06, 'epoch': 0.52} + 52%|█████▏ | 6307/12188 [13:27:24<11:38:33, 7.13s/it] 52%|█████▏ | 6308/12188 [13:27:30<11:26:05, 7.00s/it] {'loss': 0.3437, 'grad_norm': 0.637243367970202, 'learning_rate': 4.960139239863458e-06, 'epoch': 0.52} + 52%|█████▏ | 6308/12188 [13:27:30<11:26:05, 7.00s/it] 52%|█████▏ | 6309/12188 [13:27:38<11:36:48, 7.11s/it] {'loss': 0.326, 'grad_norm': 0.5901485027418826, 'learning_rate': 4.958810577431142e-06, 'epoch': 0.52} + 52%|█████▏ | 6309/12188 [13:27:38<11:36:48, 7.11s/it] 52%|█████▏ | 6310/12188 [13:27:45<11:26:58, 7.01s/it] {'loss': 0.3224, 'grad_norm': 0.6084238453345372, 'learning_rate': 4.957481917907559e-06, 'epoch': 0.52} + 52%|█████▏ | 6310/12188 [13:27:45<11:26:58, 7.01s/it] 52%|█████▏ | 6311/12188 [13:27:51<11:21:32, 6.96s/it] {'loss': 0.3297, 'grad_norm': 0.7538280705024395, 'learning_rate': 4.956153261386536e-06, 'epoch': 0.52} + 52%|█████▏ | 6311/12188 [13:27:51<11:21:32, 6.96s/it] 52%|█████▏ | 6312/12188 [13:27:59<11:43:57, 7.19s/it] {'loss': 0.3247, 'grad_norm': 0.6288244443165543, 'learning_rate': 4.954824607961895e-06, 'epoch': 0.52} + 52%|█████▏ | 6312/12188 [13:27:59<11:43:57, 7.19s/it] 52%|█████▏ | 6313/12188 [13:28:06<11:39:59, 7.15s/it] {'loss': 0.3396, 'grad_norm': 0.7070583202430462, 'learning_rate': 4.953495957727472e-06, 'epoch': 0.52} + 52%|█████▏ | 6313/12188 [13:28:06<11:39:59, 7.15s/it] 52%|█████▏ | 6314/12188 [13:28:14<11:46:20, 7.22s/it] {'loss': 0.3838, 'grad_norm': 0.7218404667711075, 'learning_rate': 4.952167310777087e-06, 'epoch': 0.52} + 52%|█████▏ | 6314/12188 [13:28:14<11:46:20, 7.22s/it] 52%|█████▏ | 6315/12188 [13:28:22<12:33:16, 7.70s/it] {'loss': 0.3088, 'grad_norm': 0.6236963148593145, 'learning_rate': 4.95083866720457e-06, 'epoch': 0.52} + 52%|█████▏ | 6315/12188 [13:28:22<12:33:16, 7.70s/it] 52%|█████▏ | 6316/12188 [13:28:29<12:13:23, 7.49s/it] {'loss': 0.3399, 'grad_norm': 0.6849636057744553, 'learning_rate': 4.949510027103749e-06, 'epoch': 0.52} + 52%|█████▏ | 6316/12188 [13:28:29<12:13:23, 7.49s/it] 52%|█████▏ | 6317/12188 [13:28:36<11:51:13, 7.27s/it] {'loss': 0.2916, 'grad_norm': 0.6489221616527864, 'learning_rate': 4.948181390568447e-06, 'epoch': 0.52} + 52%|█████▏ | 6317/12188 [13:28:36<11:51:13, 7.27s/it] 52%|█████▏ | 6318/12188 [13:28:56<17:57:25, 11.01s/it] {'loss': 0.3278, 'grad_norm': 0.7550642511114839, 'learning_rate': 4.946852757692494e-06, 'epoch': 0.52} + 52%|█████▏ | 6318/12188 [13:28:56<17:57:25, 11.01s/it] 52%|█████▏ | 6319/12188 [13:29:03<15:52:37, 9.74s/it] {'loss': 0.2805, 'grad_norm': 0.6464516370877077, 'learning_rate': 4.945524128569712e-06, 'epoch': 0.52} + 52%|█████▏ | 6319/12188 [13:29:03<15:52:37, 9.74s/it] 52%|█████▏ | 6320/12188 [13:29:10<14:40:25, 9.00s/it] {'loss': 0.279, 'grad_norm': 0.6392095533866036, 'learning_rate': 4.9441955032939275e-06, 'epoch': 0.52} + 52%|█████▏ | 6320/12188 [13:29:10<14:40:25, 9.00s/it] 52%|█████▏ | 6321/12188 [13:29:33<21:23:21, 13.12s/it] {'loss': 0.3509, 'grad_norm': 0.6864196083079667, 'learning_rate': 4.94286688195897e-06, 'epoch': 0.52} + 52%|█████▏ | 6321/12188 [13:29:33<21:23:21, 13.12s/it] 52%|█████▏ | 6322/12188 [13:29:39<18:11:43, 11.17s/it] {'loss': 0.3471, 'grad_norm': 0.6718820991223714, 'learning_rate': 4.941538264658659e-06, 'epoch': 0.52} + 52%|█████▏ | 6322/12188 [13:29:39<18:11:43, 11.17s/it] 52%|█████▏ | 6323/12188 [13:29:47<16:40:44, 10.24s/it] {'loss': 0.38, 'grad_norm': 0.6652029419513638, 'learning_rate': 4.940209651486822e-06, 'epoch': 0.52} + 52%|█████▏ | 6323/12188 [13:29:47<16:40:44, 10.24s/it] 52%|█████▏ | 6324/12188 [13:29:55<15:26:47, 9.48s/it] {'loss': 0.3133, 'grad_norm': 0.7021689227344364, 'learning_rate': 4.938881042537286e-06, 'epoch': 0.52} + 52%|█████▏ | 6324/12188 [13:29:55<15:26:47, 9.48s/it] 52%|█████▏ | 6325/12188 [13:30:02<14:09:26, 8.69s/it] {'loss': 0.2822, 'grad_norm': 0.6169948946190759, 'learning_rate': 4.937552437903869e-06, 'epoch': 0.52} + 52%|█████▏ | 6325/12188 [13:30:02<14:09:26, 8.69s/it] 52%|█████▏ | 6326/12188 [13:30:10<13:40:43, 8.40s/it] {'loss': 0.3325, 'grad_norm': 0.701789398738632, 'learning_rate': 4.9362238376804015e-06, 'epoch': 0.52} + 52%|█████▏ | 6326/12188 [13:30:10<13:40:43, 8.40s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f2caea32c00> +[Try #0] Failed to fetch sample 4378969 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f2caea32c00> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'eBay Home'"}, {'from': 'gpt', 'value': '\nclick(x=0.253, y=0.1945)\n'}]} + 52%|█████▏ | 6327/12188 [13:30:17<13:08:08, 8.07s/it] {'loss': 0.3166, 'grad_norm': 0.6309096926400584, 'learning_rate': 4.934895241960702e-06, 'epoch': 0.52} + 52%|█████▏ | 6327/12188 [13:30:17<13:08:08, 8.07s/it] 52%|█████▏ | 6328/12188 [13:30:24<12:34:42, 7.73s/it] {'loss': 0.3396, 'grad_norm': 0.6661901582167571, 'learning_rate': 4.933566650838595e-06, 'epoch': 0.52} + 52%|█████▏ | 6328/12188 [13:30:24<12:34:42, 7.73s/it] 52%|█████▏ | 6329/12188 [13:30:31<12:19:36, 7.57s/it] {'loss': 0.3293, 'grad_norm': 0.7968513977758986, 'learning_rate': 4.932238064407907e-06, 'epoch': 0.52} + 52%|█████▏ | 6329/12188 [13:30:31<12:19:36, 7.57s/it] 52%|█████▏ | 6330/12188 [13:30:38<11:58:43, 7.36s/it] {'loss': 0.3132, 'grad_norm': 0.6419164698388885, 'learning_rate': 4.930909482762458e-06, 'epoch': 0.52} + 52%|█████▏ | 6330/12188 [13:30:38<11:58:43, 7.36s/it] 52%|█████▏ | 6331/12188 [13:30:45<11:53:26, 7.31s/it] {'loss': 0.2994, 'grad_norm': 0.7189913715014496, 'learning_rate': 4.92958090599607e-06, 'epoch': 0.52} + 52%|█████▏ | 6331/12188 [13:30:45<11:53:26, 7.31s/it] 52%|█████▏ | 6332/12188 [13:30:53<11:56:23, 7.34s/it] {'loss': 0.3371, 'grad_norm': 0.6951910135068428, 'learning_rate': 4.9282523342025645e-06, 'epoch': 0.52} + 52%|█████▏ | 6332/12188 [13:30:53<11:56:23, 7.34s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f5f8b76df30> +[Try #0] Failed to fetch sample 4673213 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f5f8b76df30> +Problematic sample: {'image': '20240823_045930_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'common/thinArrow'"}, {'from': 'gpt', 'value': '\nclick(x=0.184, y=0.9515)\n'}]} + 52%|█████▏ | 6333/12188 [13:31:01<12:15:42, 7.54s/it] {'loss': 0.3108, 'grad_norm': 0.6342317546682834, 'learning_rate': 4.926923767475763e-06, 'epoch': 0.52} + 52%|█████▏ | 6333/12188 [13:31:01<12:15:42, 7.54s/it] 52%|█████▏ | 6334/12188 [13:31:08<12:04:22, 7.42s/it] {'loss': 0.322, 'grad_norm': 0.757014494470952, 'learning_rate': 4.92559520590949e-06, 'epoch': 0.52} + 52%|█████▏ | 6334/12188 [13:31:08<12:04:22, 7.42s/it] 52%|█████▏ | 6335/12188 [13:31:15<11:45:55, 7.24s/it] {'loss': 0.3314, 'grad_norm': 0.9938035981200666, 'learning_rate': 4.9242666495975615e-06, 'epoch': 0.52} + 52%|█████▏ | 6335/12188 [13:31:15<11:45:55, 7.24s/it] 52%|█████▏ | 6336/12188 [13:31:22<11:47:00, 7.25s/it] {'loss': 0.357, 'grad_norm': 0.6736032170948458, 'learning_rate': 4.922938098633803e-06, 'epoch': 0.52} + 52%|█████▏ | 6336/12188 [13:31:22<11:47:00, 7.25s/it] 52%|█████▏ | 6337/12188 [13:31:42<17:50:52, 10.98s/it] {'loss': 0.3101, 'grad_norm': 0.6678352462565611, 'learning_rate': 4.92160955311203e-06, 'epoch': 0.52} + 52%|█████▏ | 6337/12188 [13:31:42<17:50:52, 10.98s/it] 52%|█████▏ | 6338/12188 [13:31:49<16:20:52, 10.06s/it] {'loss': 0.3097, 'grad_norm': 0.64403409561554, 'learning_rate': 4.920281013126065e-06, 'epoch': 0.52} + 52%|█████▏ | 6338/12188 [13:31:49<16:20:52, 10.06s/it] 52%|█████▏ | 6339/12188 [13:32:12<22:21:54, 13.77s/it] {'loss': 0.3137, 'grad_norm': 0.6596206512378826, 'learning_rate': 4.918952478769729e-06, 'epoch': 0.52} + 52%|█████▏ | 6339/12188 [13:32:12<22:21:54, 13.77s/it] 52%|█████▏ | 6340/12188 [13:32:33<25:56:47, 15.97s/it] {'loss': 0.3372, 'grad_norm': 0.693519761800403, 'learning_rate': 4.917623950136836e-06, 'epoch': 0.52} + 52%|█████▏ | 6340/12188 [13:32:33<25:56:47, 15.97s/it] 52%|█████▏ | 6341/12188 [13:32:43<22:50:07, 14.06s/it] {'loss': 0.3222, 'grad_norm': 0.6603338032978666, 'learning_rate': 4.916295427321206e-06, 'epoch': 0.52} + 52%|█████▏ | 6341/12188 [13:32:43<22:50:07, 14.06s/it] 52%|█████▏ | 6342/12188 [13:32:50<19:21:45, 11.92s/it] {'loss': 0.3387, 'grad_norm': 0.6911408422880482, 'learning_rate': 4.914966910416662e-06, 'epoch': 0.52} + 52%|█████▏ | 6342/12188 [13:32:50<19:21:45, 11.92s/it] 52%|█████▏ | 6343/12188 [13:32:56<16:49:18, 10.36s/it] {'loss': 0.3105, 'grad_norm': 0.6372374205318857, 'learning_rate': 4.913638399517017e-06, 'epoch': 0.52} + 52%|█████▏ | 6343/12188 [13:32:56<16:49:18, 10.36s/it] 52%|█████▏ | 6344/12188 [13:33:37<31:39:16, 19.50s/it] {'loss': 0.3053, 'grad_norm': 0.6352722677233603, 'learning_rate': 4.912309894716089e-06, 'epoch': 0.52} + 52%|█████▏ | 6344/12188 [13:33:37<31:39:16, 19.50s/it] 52%|█████▏ | 6345/12188 [13:34:04<35:08:20, 21.65s/it] {'loss': 0.3227, 'grad_norm': 0.6819068592537884, 'learning_rate': 4.910981396107696e-06, 'epoch': 0.52} + 52%|█████▏ | 6345/12188 [13:34:04<35:08:20, 21.65s/it] 52%|█████▏ | 6346/12188 [13:34:29<37:03:26, 22.84s/it] {'loss': 0.3586, 'grad_norm': 0.7162950137761486, 'learning_rate': 4.909652903785652e-06, 'epoch': 0.52} + 52%|█████▏ | 6346/12188 [13:34:29<37:03:26, 22.84s/it] 52%|█████▏ | 6347/12188 [13:34:36<29:23:28, 18.11s/it] {'loss': 0.3012, 'grad_norm': 0.7020961943452084, 'learning_rate': 4.908324417843779e-06, 'epoch': 0.52} + 52%|█████▏ | 6347/12188 [13:34:36<29:23:28, 18.11s/it] 52%|█████▏ | 6348/12188 [13:34:59<31:29:01, 19.41s/it] {'loss': 0.3273, 'grad_norm': 0.6784077769535696, 'learning_rate': 4.906995938375884e-06, 'epoch': 0.52} + 52%|█████▏ | 6348/12188 [13:34:59<31:29:01, 19.41s/it] 52%|█████▏ | 6349/12188 [13:35:07<25:52:19, 15.95s/it] {'loss': 0.3519, 'grad_norm': 0.6296269709300761, 'learning_rate': 4.905667465475789e-06, 'epoch': 0.52} + 52%|█████▏ | 6349/12188 [13:35:07<25:52:19, 15.95s/it] 52%|█████▏ | 6350/12188 [13:35:28<28:32:33, 17.60s/it] {'loss': 0.3322, 'grad_norm': 0.7541140198850645, 'learning_rate': 4.904338999237303e-06, 'epoch': 0.52} + 52%|█████▏ | 6350/12188 [13:35:28<28:32:33, 17.60s/it] 52%|█████▏ | 6351/12188 [13:35:35<23:27:36, 14.47s/it] {'loss': 0.302, 'grad_norm': 0.689158457438998, 'learning_rate': 4.903010539754246e-06, 'epoch': 0.52} + 52%|█████▏ | 6351/12188 [13:35:35<23:27:36, 14.47s/it] 52%|█████▏ | 6352/12188 [13:35:55<25:59:52, 16.04s/it] {'loss': 0.3239, 'grad_norm': 0.6152989466676231, 'learning_rate': 4.901682087120429e-06, 'epoch': 0.52} + 52%|█████▏ | 6352/12188 [13:35:55<25:59:52, 16.04s/it] 52%|█████▏ | 6353/12188 [13:36:03<21:50:43, 13.48s/it] {'loss': 0.3458, 'grad_norm': 0.6654498422021737, 'learning_rate': 4.900353641429663e-06, 'epoch': 0.52} + 52%|█████▏ | 6353/12188 [13:36:03<21:50:43, 13.48s/it] 52%|█████▏ | 6354/12188 [13:36:21<24:04:49, 14.86s/it] {'loss': 0.302, 'grad_norm': 0.6425409938862446, 'learning_rate': 4.899025202775765e-06, 'epoch': 0.52} + 52%|█████▏ | 6354/12188 [13:36:21<24:04:49, 14.86s/it] 52%|█████▏ | 6355/12188 [13:36:40<26:20:44, 16.26s/it] {'loss': 0.3154, 'grad_norm': 0.7024515261389408, 'learning_rate': 4.897696771252543e-06, 'epoch': 0.52} + 52%|█████▏ | 6355/12188 [13:36:40<26:20:44, 16.26s/it] 52%|█████▏ | 6356/12188 [13:37:05<30:27:16, 18.80s/it] {'loss': 0.3307, 'grad_norm': 0.6598882299864881, 'learning_rate': 4.8963683469538095e-06, 'epoch': 0.52} + 52%|█████▏ | 6356/12188 [13:37:05<30:27:16, 18.80s/it] 52%|█████▏ | 6357/12188 [13:37:12<24:43:10, 15.26s/it] {'loss': 0.3035, 'grad_norm': 0.7969777381462393, 'learning_rate': 4.895039929973381e-06, 'epoch': 0.52} + 52%|█████▏ | 6357/12188 [13:37:12<24:43:10, 15.26s/it] 52%|█████▏ | 6358/12188 [13:37:55<38:24:55, 23.72s/it] {'loss': 0.3377, 'grad_norm': 0.7548871745875803, 'learning_rate': 4.893711520405061e-06, 'epoch': 0.52} + 52%|█████▏ | 6358/12188 [13:37:55<38:24:55, 23.72s/it] 52%|█████▏ | 6359/12188 [13:38:16<36:53:57, 22.79s/it] {'loss': 0.2833, 'grad_norm': 0.6078093195995952, 'learning_rate': 4.892383118342663e-06, 'epoch': 0.52} + 52%|█████▏ | 6359/12188 [13:38:16<36:53:57, 22.79s/it] 52%|█████▏ | 6360/12188 [13:38:23<29:06:32, 17.98s/it] {'loss': 0.3262, 'grad_norm': 0.7518478963485022, 'learning_rate': 4.891054723879998e-06, 'epoch': 0.52} + 52%|█████▏ | 6360/12188 [13:38:23<29:06:32, 17.98s/it] 52%|█████▏ | 6361/12188 [13:38:30<23:47:03, 14.69s/it] {'loss': 0.342, 'grad_norm': 0.6482356783980824, 'learning_rate': 4.889726337110871e-06, 'epoch': 0.52} + 52%|█████▏ | 6361/12188 [13:38:30<23:47:03, 14.69s/it] 52%|█████▏ | 6362/12188 [13:38:37<20:10:08, 12.46s/it] {'loss': 0.3086, 'grad_norm': 0.7061376133433687, 'learning_rate': 4.888397958129095e-06, 'epoch': 0.52} + 52%|█████▏ | 6362/12188 [13:38:37<20:10:08, 12.46s/it] 52%|█████▏ | 6363/12188 [13:38:45<18:06:52, 11.20s/it] {'loss': 0.3416, 'grad_norm': 0.702450328092684, 'learning_rate': 4.887069587028473e-06, 'epoch': 0.52} + 52%|█████▏ | 6363/12188 [13:38:45<18:06:52, 11.20s/it] 52%|█████▏ | 6364/12188 [13:38:52<15:58:16, 9.87s/it] {'loss': 0.3235, 'grad_norm': 0.6740275121571017, 'learning_rate': 4.8857412239028175e-06, 'epoch': 0.52} + 52%|█████▏ | 6364/12188 [13:38:52<15:58:16, 9.87s/it] 52%|█████▏ | 6365/12188 [13:39:12<20:40:38, 12.78s/it] {'loss': 0.317, 'grad_norm': 0.8964045097714142, 'learning_rate': 4.884412868845933e-06, 'epoch': 0.52} + 52%|█████▏ | 6365/12188 [13:39:12<20:40:38, 12.78s/it] 52%|█████▏ | 6366/12188 [13:39:37<26:52:16, 16.62s/it] {'loss': 0.3235, 'grad_norm': 0.7341031871500424, 'learning_rate': 4.883084521951626e-06, 'epoch': 0.52} + 52%|█████▏ | 6366/12188 [13:39:37<26:52:16, 16.62s/it] 52%|█████▏ | 6367/12188 [13:39:44<22:16:49, 13.78s/it] {'loss': 0.3277, 'grad_norm': 0.7362464078370211, 'learning_rate': 4.881756183313703e-06, 'epoch': 0.52} + 52%|█████▏ | 6367/12188 [13:39:44<22:16:49, 13.78s/it] 52%|█████▏ | 6368/12188 [13:40:03<24:28:51, 15.14s/it] {'loss': 0.3305, 'grad_norm': 0.6641655652038256, 'learning_rate': 4.880427853025967e-06, 'epoch': 0.52} + 52%|█████▏ | 6368/12188 [13:40:03<24:28:51, 15.14s/it] 52%|█████▏ | 6369/12188 [13:40:09<20:24:34, 12.63s/it] {'loss': 0.2948, 'grad_norm': 0.6678186090981628, 'learning_rate': 4.879099531182223e-06, 'epoch': 0.52} + 52%|█████▏ | 6369/12188 [13:40:09<20:24:34, 12.63s/it] 52%|█████▏ | 6370/12188 [13:40:40<29:09:23, 18.04s/it] {'loss': 0.3661, 'grad_norm': 0.7494080055314063, 'learning_rate': 4.877771217876279e-06, 'epoch': 0.52} + 52%|█████▏ | 6370/12188 [13:40:40<29:09:23, 18.04s/it] 52%|█████▏ | 6371/12188 [13:41:12<36:01:57, 22.30s/it] {'loss': 0.2807, 'grad_norm': 0.61908503674112, 'learning_rate': 4.876442913201932e-06, 'epoch': 0.52} + 52%|█████▏ | 6371/12188 [13:41:12<36:01:57, 22.30s/it] 52%|█████▏ | 6372/12188 [13:41:33<35:10:01, 21.77s/it] {'loss': 0.3129, 'grad_norm': 0.6093869483284496, 'learning_rate': 4.87511461725299e-06, 'epoch': 0.52} + 52%|█████▏ | 6372/12188 [13:41:33<35:10:01, 21.77s/it] 52%|█████▏ | 6373/12188 [13:41:40<27:57:53, 17.31s/it] {'loss': 0.3254, 'grad_norm': 0.657294667694786, 'learning_rate': 4.873786330123253e-06, 'epoch': 0.52} + 52%|█████▏ | 6373/12188 [13:41:40<27:57:53, 17.31s/it] 52%|█████▏ | 6374/12188 [13:42:11<34:44:42, 21.51s/it] {'loss': 0.3258, 'grad_norm': 0.6756210517533582, 'learning_rate': 4.872458051906521e-06, 'epoch': 0.52} + 52%|█████▏ | 6374/12188 [13:42:11<34:44:42, 21.51s/it] 52%|█████▏ | 6375/12188 [13:42:18<27:51:34, 17.25s/it] {'loss': 0.3471, 'grad_norm': 0.6909801876245921, 'learning_rate': 4.8711297826965996e-06, 'epoch': 0.52} + 52%|█████▏ | 6375/12188 [13:42:18<27:51:34, 17.25s/it] 52%|█████▏ | 6376/12188 [13:42:25<22:45:00, 14.09s/it] {'loss': 0.3342, 'grad_norm': 0.7064117982188312, 'learning_rate': 4.869801522587282e-06, 'epoch': 0.52} + 52%|█████▏ | 6376/12188 [13:42:25<22:45:00, 14.09s/it] 52%|█████▏ | 6377/12188 [13:43:34<49:25:07, 30.62s/it] {'loss': 0.3307, 'grad_norm': 0.7598319086873879, 'learning_rate': 4.868473271672374e-06, 'epoch': 0.52} + 52%|█████▏ | 6377/12188 [13:43:34<49:25:07, 30.62s/it] 52%|█████▏ | 6378/12188 [13:43:53<43:46:08, 27.12s/it] {'loss': 0.3222, 'grad_norm': 0.6514840329763004, 'learning_rate': 4.867145030045672e-06, 'epoch': 0.52} + 52%|█████▏ | 6378/12188 [13:43:53<43:46:08, 27.12s/it] 52%|█████▏ | 6379/12188 [13:44:01<34:28:28, 21.36s/it] {'loss': 0.3252, 'grad_norm': 0.6268027955429467, 'learning_rate': 4.865816797800974e-06, 'epoch': 0.52} + 52%|█████▏ | 6379/12188 [13:44:01<34:28:28, 21.36s/it] 52%|█████▏ | 6380/12188 [13:44:09<27:48:48, 17.24s/it] {'loss': 0.2722, 'grad_norm': 0.7017156524124344, 'learning_rate': 4.86448857503208e-06, 'epoch': 0.52} + 52%|█████▏ | 6380/12188 [13:44:09<27:48:48, 17.24s/it] 52%|█████▏ | 6381/12188 [13:44:29<29:13:38, 18.12s/it] {'loss': 0.322, 'grad_norm': 0.6696823561115274, 'learning_rate': 4.8631603618327824e-06, 'epoch': 0.52} + 52%|█████▏ | 6381/12188 [13:44:29<29:13:38, 18.12s/it] 52%|█████▏ | 6382/12188 [13:44:36<23:41:18, 14.69s/it] {'loss': 0.3235, 'grad_norm': 0.6366954617613553, 'learning_rate': 4.8618321582968806e-06, 'epoch': 0.52} + 52%|█████▏ | 6382/12188 [13:44:36<23:41:18, 14.69s/it] 52%|█████▏ | 6383/12188 [13:44:44<20:30:18, 12.72s/it] {'loss': 0.3222, 'grad_norm': 0.7129473512633275, 'learning_rate': 4.860503964518171e-06, 'epoch': 0.52} + 52%|█████▏ | 6383/12188 [13:44:44<20:30:18, 12.72s/it] 52%|█████▏ | 6384/12188 [13:44:51<17:37:37, 10.93s/it] {'loss': 0.3072, 'grad_norm': 0.7019331808949657, 'learning_rate': 4.859175780590446e-06, 'epoch': 0.52} + 52%|█████▏ | 6384/12188 [13:44:51<17:37:37, 10.93s/it] 52%|█████▏ | 6385/12188 [13:45:16<24:43:46, 15.34s/it] {'loss': 0.3453, 'grad_norm': 0.882216062870113, 'learning_rate': 4.8578476066075035e-06, 'epoch': 0.52} + 52%|█████▏ | 6385/12188 [13:45:16<24:43:46, 15.34s/it] 52%|█████▏ | 6386/12188 [13:45:23<20:39:17, 12.82s/it] {'loss': 0.3303, 'grad_norm': 0.6619102282699266, 'learning_rate': 4.856519442663134e-06, 'epoch': 0.52} + 52%|█████▏ | 6386/12188 [13:45:23<20:39:17, 12.82s/it] 52%|█████▏ | 6387/12188 [13:45:33<19:01:10, 11.80s/it] {'loss': 0.309, 'grad_norm': 0.6584526859196712, 'learning_rate': 4.855191288851129e-06, 'epoch': 0.52} + 52%|█████▏ | 6387/12188 [13:45:33<19:01:10, 11.80s/it] 52%|█████▏ | 6388/12188 [13:45:51<22:28:23, 13.95s/it] {'loss': 0.344, 'grad_norm': 0.7261233000103132, 'learning_rate': 4.853863145265284e-06, 'epoch': 0.52} + 52%|█████▏ | 6388/12188 [13:45:51<22:28:23, 13.95s/it] 52%|█████▏ | 6389/12188 [13:45:59<19:33:12, 12.14s/it] {'loss': 0.3054, 'grad_norm': 0.7276654752376331, 'learning_rate': 4.852535011999388e-06, 'epoch': 0.52} + 52%|█████▏ | 6389/12188 [13:45:59<19:33:12, 12.14s/it] 52%|█████▏ | 6390/12188 [13:46:06<17:03:49, 10.60s/it] {'loss': 0.2971, 'grad_norm': 0.7114448045587529, 'learning_rate': 4.851206889147234e-06, 'epoch': 0.52} + 52%|█████▏ | 6390/12188 [13:46:06<17:03:49, 10.60s/it] 52%|█████▏ | 6391/12188 [13:46:14<15:31:18, 9.64s/it] {'loss': 0.2946, 'grad_norm': 0.6645113191519174, 'learning_rate': 4.849878776802609e-06, 'epoch': 0.52} + 52%|█████▏ | 6391/12188 [13:46:14<15:31:18, 9.64s/it] 52%|█████▏ | 6392/12188 [13:46:20<14:03:47, 8.73s/it] {'loss': 0.3117, 'grad_norm': 0.7028239825751901, 'learning_rate': 4.848550675059303e-06, 'epoch': 0.52} + 52%|█████▏ | 6392/12188 [13:46:20<14:03:47, 8.73s/it] 52%|█████▏ | 6393/12188 [13:46:28<13:36:26, 8.45s/it] {'loss': 0.2907, 'grad_norm': 0.6322231255375499, 'learning_rate': 4.847222584011107e-06, 'epoch': 0.52} + 52%|█████▏ | 6393/12188 [13:46:28<13:36:26, 8.45s/it] 52%|█████▏ | 6394/12188 [13:46:47<18:40:48, 11.61s/it] {'loss': 0.3305, 'grad_norm': 0.6500429532246079, 'learning_rate': 4.845894503751805e-06, 'epoch': 0.52} + 52%|█████▏ | 6394/12188 [13:46:47<18:40:48, 11.61s/it] 52%|█████▏ | 6395/12188 [13:46:54<16:22:38, 10.18s/it] {'loss': 0.2809, 'grad_norm': 0.6808992291614967, 'learning_rate': 4.844566434375186e-06, 'epoch': 0.52} + 52%|█████▏ | 6395/12188 [13:46:54<16:22:38, 10.18s/it] 52%|█████▏ | 6396/12188 [13:47:01<14:56:50, 9.29s/it] {'loss': 0.304, 'grad_norm': 1.7701173490131832, 'learning_rate': 4.843238375975037e-06, 'epoch': 0.52} + 52%|█████▏ | 6396/12188 [13:47:01<14:56:50, 9.29s/it] 52%|█████▏ | 6397/12188 [13:47:09<14:12:23, 8.83s/it] {'loss': 0.3552, 'grad_norm': 0.6536991270006799, 'learning_rate': 4.84191032864514e-06, 'epoch': 0.52} + 52%|█████▏ | 6397/12188 [13:47:09<14:12:23, 8.83s/it] 52%|█████▏ | 6398/12188 [13:47:16<13:30:26, 8.40s/it] {'loss': 0.3394, 'grad_norm': 0.6519010251564082, 'learning_rate': 4.840582292479283e-06, 'epoch': 0.52} + 52%|█████▏ | 6398/12188 [13:47:16<13:30:26, 8.40s/it] 53%|█████▎ | 6399/12188 [13:47:24<13:09:50, 8.19s/it] {'loss': 0.3551, 'grad_norm': 0.625415893996803, 'learning_rate': 4.839254267571247e-06, 'epoch': 0.53} + 53%|█████▎ | 6399/12188 [13:47:24<13:09:50, 8.19s/it] 53%|█████▎ | 6400/12188 [13:47:31<12:42:53, 7.91s/it] {'loss': 0.2994, 'grad_norm': 0.6804610950058226, 'learning_rate': 4.8379262540148185e-06, 'epoch': 0.53} + 53%|█████▎ | 6400/12188 [13:47:31<12:42:53, 7.91s/it] 53%|█████▎ | 6401/12188 [13:47:52<18:51:34, 11.73s/it] {'loss': 0.3268, 'grad_norm': 0.6509451454120031, 'learning_rate': 4.836598251903777e-06, 'epoch': 0.53} + 53%|█████▎ | 6401/12188 [13:47:52<18:51:34, 11.73s/it] 53%|█████▎ | 6402/12188 [13:47:59<16:35:05, 10.32s/it] {'loss': 0.3102, 'grad_norm': 0.6725567493374132, 'learning_rate': 4.835270261331903e-06, 'epoch': 0.53} + 53%|█████▎ | 6402/12188 [13:47:59<16:35:05, 10.32s/it] 53%|█████▎ | 6403/12188 [13:48:06<15:01:43, 9.35s/it] {'loss': 0.3142, 'grad_norm': 0.6365206690782139, 'learning_rate': 4.83394228239298e-06, 'epoch': 0.53} + 53%|█████▎ | 6403/12188 [13:48:06<15:01:43, 9.35s/it] 53%|█████▎ | 6404/12188 [13:48:15<14:40:04, 9.13s/it] {'loss': 0.3073, 'grad_norm': 0.6417063370056676, 'learning_rate': 4.832614315180784e-06, 'epoch': 0.53} + 53%|█████▎ | 6404/12188 [13:48:15<14:40:04, 9.13s/it] 53%|█████▎ | 6405/12188 [13:48:22<13:53:17, 8.65s/it] {'loss': 0.3328, 'grad_norm': 0.7313683857154759, 'learning_rate': 4.8312863597890976e-06, 'epoch': 0.53} + 53%|█████▎ | 6405/12188 [13:48:22<13:53:17, 8.65s/it] 53%|█████▎ | 6406/12188 [13:48:30<13:27:08, 8.38s/it] {'loss': 0.3005, 'grad_norm': 0.615524609236577, 'learning_rate': 4.829958416311698e-06, 'epoch': 0.53} + 53%|█████▎ | 6406/12188 [13:48:30<13:27:08, 8.38s/it] 53%|█████▎ | 6407/12188 [13:48:38<13:07:14, 8.17s/it] {'loss': 0.3149, 'grad_norm': 0.6963599218588755, 'learning_rate': 4.828630484842362e-06, 'epoch': 0.53} + 53%|█████▎ | 6407/12188 [13:48:38<13:07:14, 8.17s/it] 53%|█████▎ | 6408/12188 [13:48:45<12:29:33, 7.78s/it] {'loss': 0.3809, 'grad_norm': 0.7160141157120016, 'learning_rate': 4.827302565474866e-06, 'epoch': 0.53} + 53%|█████▎ | 6408/12188 [13:48:45<12:29:33, 7.78s/it] 53%|█████▎ | 6409/12188 [13:48:51<12:03:33, 7.51s/it] {'loss': 0.2878, 'grad_norm': 0.6645684251276942, 'learning_rate': 4.825974658302984e-06, 'epoch': 0.53} + 53%|█████▎ | 6409/12188 [13:48:51<12:03:33, 7.51s/it] 53%|█████▎ | 6410/12188 [13:48:59<11:57:43, 7.45s/it] {'loss': 0.3479, 'grad_norm': 0.6274811153207897, 'learning_rate': 4.8246467634204916e-06, 'epoch': 0.53} + 53%|█████▎ | 6410/12188 [13:48:59<11:57:43, 7.45s/it] 53%|█████▎ | 6411/12188 [13:49:06<12:05:52, 7.54s/it] {'loss': 0.2831, 'grad_norm': 0.7657754740911851, 'learning_rate': 4.823318880921165e-06, 'epoch': 0.53} + 53%|█████▎ | 6411/12188 [13:49:06<12:05:52, 7.54s/it] 53%|█████▎ | 6412/12188 [13:49:15<12:38:21, 7.88s/it] {'loss': 0.2879, 'grad_norm': 0.6163848627368349, 'learning_rate': 4.821991010898773e-06, 'epoch': 0.53} + 53%|█████▎ | 6412/12188 [13:49:15<12:38:21, 7.88s/it] 53%|█████▎ | 6413/12188 [13:49:22<12:14:42, 7.63s/it] {'loss': 0.3251, 'grad_norm': 0.7123570557573818, 'learning_rate': 4.820663153447091e-06, 'epoch': 0.53} + 53%|█████▎ | 6413/12188 [13:49:22<12:14:42, 7.63s/it] 53%|█████▎ | 6414/12188 [13:49:29<11:54:08, 7.42s/it] {'loss': 0.3327, 'grad_norm': 0.7338435296933814, 'learning_rate': 4.819335308659891e-06, 'epoch': 0.53} + 53%|█████▎ | 6414/12188 [13:49:29<11:54:08, 7.42s/it] 53%|█████▎ | 6415/12188 [13:49:36<11:28:45, 7.16s/it] {'loss': 0.2974, 'grad_norm': 0.6856323477542919, 'learning_rate': 4.818007476630938e-06, 'epoch': 0.53} + 53%|█████▎ | 6415/12188 [13:49:36<11:28:45, 7.16s/it] 53%|█████▎ | 6416/12188 [13:49:43<11:38:33, 7.26s/it] {'loss': 0.303, 'grad_norm': 0.702840335745122, 'learning_rate': 4.8166796574540065e-06, 'epoch': 0.53} + 53%|█████▎ | 6416/12188 [13:49:43<11:38:33, 7.26s/it] 53%|█████▎ | 6417/12188 [13:49:50<11:18:02, 7.05s/it] {'loss': 0.3496, 'grad_norm': 0.6640547801932544, 'learning_rate': 4.815351851222862e-06, 'epoch': 0.53} + 53%|█████▎ | 6417/12188 [13:49:50<11:18:02, 7.05s/it] 53%|█████▎ | 6418/12188 [13:49:58<11:39:07, 7.27s/it] {'loss': 0.3195, 'grad_norm': 0.6682107735190768, 'learning_rate': 4.814024058031272e-06, 'epoch': 0.53} + 53%|█████▎ | 6418/12188 [13:49:58<11:39:07, 7.27s/it] 53%|█████▎ | 6419/12188 [13:50:04<11:27:50, 7.15s/it] {'loss': 0.3512, 'grad_norm': 0.6431278805081704, 'learning_rate': 4.812696277973007e-06, 'epoch': 0.53} + 53%|█████▎ | 6419/12188 [13:50:04<11:27:50, 7.15s/it] 53%|█████▎ | 6420/12188 [13:50:12<11:34:17, 7.22s/it] {'loss': 0.3327, 'grad_norm': 0.7081851912923351, 'learning_rate': 4.811368511141828e-06, 'epoch': 0.53} + 53%|█████▎ | 6420/12188 [13:50:12<11:34:17, 7.22s/it] 53%|█████▎ | 6421/12188 [13:50:20<12:02:16, 7.51s/it] {'loss': 0.3266, 'grad_norm': 0.707561699292353, 'learning_rate': 4.810040757631503e-06, 'epoch': 0.53} + 53%|█████▎ | 6421/12188 [13:50:20<12:02:16, 7.51s/it] 53%|█████▎ | 6422/12188 [13:50:28<12:11:22, 7.61s/it] {'loss': 0.3123, 'grad_norm': 0.7785197868042092, 'learning_rate': 4.8087130175357936e-06, 'epoch': 0.53} + 53%|█████▎ | 6422/12188 [13:50:28<12:11:22, 7.61s/it] 53%|█████▎ | 6423/12188 [13:50:34<11:38:46, 7.27s/it] {'loss': 0.2971, 'grad_norm': 0.6024005872219144, 'learning_rate': 4.807385290948463e-06, 'epoch': 0.53} + 53%|█████▎ | 6423/12188 [13:50:34<11:38:46, 7.27s/it] 53%|█████▎ | 6424/12188 [13:50:42<12:00:57, 7.50s/it] {'loss': 0.348, 'grad_norm': 0.6142711236342112, 'learning_rate': 4.806057577963276e-06, 'epoch': 0.53} + 53%|█████▎ | 6424/12188 [13:50:42<12:00:57, 7.50s/it] 53%|█████▎ | 6425/12188 [13:50:50<11:55:44, 7.45s/it] {'loss': 0.3416, 'grad_norm': 0.9379075418931083, 'learning_rate': 4.8047298786739884e-06, 'epoch': 0.53} + 53%|█████▎ | 6425/12188 [13:50:50<11:55:44, 7.45s/it] 53%|█████▎ | 6426/12188 [13:50:57<11:53:10, 7.43s/it] {'loss': 0.3437, 'grad_norm': 0.6953238841419617, 'learning_rate': 4.803402193174365e-06, 'epoch': 0.53} + 53%|█████▎ | 6426/12188 [13:50:57<11:53:10, 7.43s/it] 53%|█████▎ | 6427/12188 [13:51:04<11:52:34, 7.42s/it] {'loss': 0.2941, 'grad_norm': 0.6938141037203264, 'learning_rate': 4.802074521558162e-06, 'epoch': 0.53} + 53%|█████▎ | 6427/12188 [13:51:04<11:52:34, 7.42s/it] 53%|█████▎ | 6428/12188 [13:51:12<11:42:54, 7.32s/it] {'loss': 0.3465, 'grad_norm': 0.7172131393005544, 'learning_rate': 4.800746863919138e-06, 'epoch': 0.53} + 53%|█████▎ | 6428/12188 [13:51:12<11:42:54, 7.32s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f6c02f1b060> +[Try #0] Failed to fetch sample 4739944 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f6c02f1b060> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Search this list'"}, {'from': 'gpt', 'value': '\nclick(x=0.565, y=0.0715)\n'}]} + 53%|█████▎ | 6429/12188 [13:51:19<11:35:14, 7.24s/it] {'loss': 0.3182, 'grad_norm': 0.6955343225795614, 'learning_rate': 4.799419220351053e-06, 'epoch': 0.53} + 53%|█████▎ | 6429/12188 [13:51:19<11:35:14, 7.24s/it] 53%|█████▎ | 6430/12188 [13:51:27<12:03:08, 7.54s/it] {'loss': 0.3217, 'grad_norm': 0.6074139835570509, 'learning_rate': 4.798091590947657e-06, 'epoch': 0.53} + 53%|█████▎ | 6430/12188 [13:51:27<12:03:08, 7.54s/it] 53%|█████▎ | 6431/12188 [13:51:34<12:00:53, 7.51s/it] {'loss': 0.2989, 'grad_norm': 0.716395281215371, 'learning_rate': 4.796763975802708e-06, 'epoch': 0.53} + 53%|█████▎ | 6431/12188 [13:51:34<12:00:53, 7.51s/it] 53%|█████▎ | 6432/12188 [13:51:41<11:40:02, 7.30s/it] {'loss': 0.3189, 'grad_norm': 0.6560262884724528, 'learning_rate': 4.795436375009964e-06, 'epoch': 0.53} + 53%|█████▎ | 6432/12188 [13:51:41<11:40:02, 7.30s/it] 53%|█████▎ | 6433/12188 [13:51:48<11:32:54, 7.22s/it] {'loss': 0.3411, 'grad_norm': 0.663509697600703, 'learning_rate': 4.794108788663172e-06, 'epoch': 0.53} + 53%|█████▎ | 6433/12188 [13:51:48<11:32:54, 7.22s/it] 53%|█████▎ | 6434/12188 [13:51:55<11:25:21, 7.15s/it] {'loss': 0.3217, 'grad_norm': 0.842701441298497, 'learning_rate': 4.792781216856088e-06, 'epoch': 0.53} + 53%|█████▎ | 6434/12188 [13:51:55<11:25:21, 7.15s/it] 53%|█████▎ | 6435/12188 [13:52:02<11:21:09, 7.10s/it] {'loss': 0.3277, 'grad_norm': 0.709771154165741, 'learning_rate': 4.7914536596824616e-06, 'epoch': 0.53} + 53%|█████▎ | 6435/12188 [13:52:02<11:21:09, 7.10s/it] 53%|█████▎ | 6436/12188 [13:52:09<11:09:50, 6.99s/it] {'loss': 0.3161, 'grad_norm': 0.6592317606034984, 'learning_rate': 4.790126117236041e-06, 'epoch': 0.53} + 53%|█████▎ | 6436/12188 [13:52:09<11:09:50, 6.99s/it] 53%|█████▎ | 6437/12188 [13:52:17<11:52:57, 7.44s/it] {'loss': 0.3362, 'grad_norm': 0.6472412099415618, 'learning_rate': 4.788798589610579e-06, 'epoch': 0.53} + 53%|█████▎ | 6437/12188 [13:52:17<11:52:57, 7.44s/it] 53%|█████▎ | 6438/12188 [13:52:25<11:48:55, 7.40s/it] {'loss': 0.3103, 'grad_norm': 0.676737385965375, 'learning_rate': 4.787471076899819e-06, 'epoch': 0.53} + 53%|█████▎ | 6438/12188 [13:52:25<11:48:55, 7.40s/it] 53%|█████▎ | 6439/12188 [13:52:32<11:45:11, 7.36s/it] {'loss': 0.338, 'grad_norm': 0.6695112928863937, 'learning_rate': 4.7861435791975124e-06, 'epoch': 0.53} + 53%|█████▎ | 6439/12188 [13:52:32<11:45:11, 7.36s/it] 53%|█████▎ | 6440/12188 [13:52:39<11:30:51, 7.21s/it] {'loss': 0.3553, 'grad_norm': 0.6286101399178482, 'learning_rate': 4.784816096597401e-06, 'epoch': 0.53} + 53%|█████▎ | 6440/12188 [13:52:39<11:30:51, 7.21s/it] 53%|█████▎ | 6441/12188 [13:52:46<11:31:31, 7.22s/it] {'loss': 0.2978, 'grad_norm': 0.7030778956387379, 'learning_rate': 4.783488629193231e-06, 'epoch': 0.53} + 53%|█████▎ | 6441/12188 [13:52:46<11:31:31, 7.22s/it] 53%|█████▎ | 6442/12188 [13:52:53<11:29:20, 7.20s/it] {'loss': 0.3334, 'grad_norm': 0.7620871776306524, 'learning_rate': 4.782161177078748e-06, 'epoch': 0.53} + 53%|█████▎ | 6442/12188 [13:52:53<11:29:20, 7.20s/it] 53%|█████▎ | 6443/12188 [13:53:01<11:51:14, 7.43s/it] {'loss': 0.3274, 'grad_norm': 0.7411217968885891, 'learning_rate': 4.78083374034769e-06, 'epoch': 0.53} + 53%|█████▎ | 6443/12188 [13:53:01<11:51:14, 7.43s/it] 53%|█████▎ | 6444/12188 [13:53:08<11:45:22, 7.37s/it] {'loss': 0.3532, 'grad_norm': 0.7322676438938259, 'learning_rate': 4.779506319093804e-06, 'epoch': 0.53} + 53%|█████▎ | 6444/12188 [13:53:08<11:45:22, 7.37s/it] 53%|█████▎ | 6445/12188 [13:53:16<11:43:37, 7.35s/it] {'loss': 0.302, 'grad_norm': 0.7935104611200501, 'learning_rate': 4.778178913410826e-06, 'epoch': 0.53} + 53%|█████▎ | 6445/12188 [13:53:16<11:43:37, 7.35s/it] 53%|█████▎ | 6446/12188 [13:53:23<11:49:14, 7.41s/it] {'loss': 0.3022, 'grad_norm': 0.6204027958975259, 'learning_rate': 4.776851523392495e-06, 'epoch': 0.53} + 53%|█████▎ | 6446/12188 [13:53:23<11:49:14, 7.41s/it] 53%|█████▎ | 6447/12188 [13:53:30<11:33:04, 7.24s/it] {'loss': 0.3323, 'grad_norm': 0.7408853705289103, 'learning_rate': 4.7755241491325536e-06, 'epoch': 0.53} + 53%|█████▎ | 6447/12188 [13:53:30<11:33:04, 7.24s/it] 53%|█████▎ | 6448/12188 [13:53:37<11:31:46, 7.23s/it] {'loss': 0.3191, 'grad_norm': 0.5895372929375333, 'learning_rate': 4.774196790724735e-06, 'epoch': 0.53} + 53%|█████▎ | 6448/12188 [13:53:37<11:31:46, 7.23s/it] 53%|█████▎ | 6449/12188 [13:53:44<11:13:11, 7.04s/it] {'loss': 0.3158, 'grad_norm': 0.7025894922565487, 'learning_rate': 4.772869448262776e-06, 'epoch': 0.53} + 53%|█████▎ | 6449/12188 [13:53:44<11:13:11, 7.04s/it] 53%|█████▎ | 6450/12188 [13:53:51<11:14:16, 7.05s/it] {'loss': 0.324, 'grad_norm': 0.6337893609081927, 'learning_rate': 4.771542121840413e-06, 'epoch': 0.53} + 53%|█████▎ | 6450/12188 [13:53:51<11:14:16, 7.05s/it] 53%|█████▎ | 6451/12188 [13:53:58<11:27:27, 7.19s/it] {'loss': 0.3016, 'grad_norm': 0.6056857251647226, 'learning_rate': 4.770214811551377e-06, 'epoch': 0.53} + 53%|█████▎ | 6451/12188 [13:53:58<11:27:27, 7.19s/it] 53%|█████▎ | 6452/12188 [13:54:06<11:32:44, 7.25s/it] {'loss': 0.3214, 'grad_norm': 0.6091318508832806, 'learning_rate': 4.7688875174894035e-06, 'epoch': 0.53} + 53%|█████▎ | 6452/12188 [13:54:06<11:32:44, 7.25s/it] 53%|█████▎ | 6453/12188 [13:54:13<11:27:05, 7.19s/it] {'loss': 0.3523, 'grad_norm': 0.8672441463235769, 'learning_rate': 4.76756023974822e-06, 'epoch': 0.53} + 53%|█████▎ | 6453/12188 [13:54:13<11:27:05, 7.19s/it] 53%|█████▎ | 6454/12188 [13:54:20<11:19:47, 7.11s/it] {'loss': 0.3208, 'grad_norm': 0.6125493616888918, 'learning_rate': 4.76623297842156e-06, 'epoch': 0.53} + 53%|█████▎ | 6454/12188 [13:54:20<11:19:47, 7.11s/it] 53%|█████▎ | 6455/12188 [13:54:27<11:30:25, 7.23s/it] {'loss': 0.2997, 'grad_norm': 0.6982249271655526, 'learning_rate': 4.764905733603151e-06, 'epoch': 0.53} + 53%|█████▎ | 6455/12188 [13:54:27<11:30:25, 7.23s/it] 53%|█████▎ | 6456/12188 [13:54:38<13:08:43, 8.26s/it] {'loss': 0.3105, 'grad_norm': 0.6417057356670137, 'learning_rate': 4.763578505386722e-06, 'epoch': 0.53} + 53%|█████▎ | 6456/12188 [13:54:38<13:08:43, 8.26s/it] 53%|█████▎ | 6457/12188 [13:54:45<12:35:14, 7.91s/it] {'loss': 0.3518, 'grad_norm': 0.678905644299935, 'learning_rate': 4.762251293866e-06, 'epoch': 0.53} + 53%|█████▎ | 6457/12188 [13:54:45<12:35:14, 7.91s/it] 53%|█████▎ | 6458/12188 [13:54:53<12:41:56, 7.98s/it] {'loss': 0.317, 'grad_norm': 0.6803009456785772, 'learning_rate': 4.760924099134708e-06, 'epoch': 0.53} + 53%|█████▎ | 6458/12188 [13:54:53<12:41:56, 7.98s/it] 53%|█████▎ | 6459/12188 [13:55:00<12:03:29, 7.58s/it] {'loss': 0.3127, 'grad_norm': 0.6239532405047689, 'learning_rate': 4.759596921286571e-06, 'epoch': 0.53} + 53%|█████▎ | 6459/12188 [13:55:00<12:03:29, 7.58s/it] 53%|█████▎ | 6460/12188 [13:55:08<12:20:12, 7.75s/it] {'loss': 0.2847, 'grad_norm': 0.6602102923728154, 'learning_rate': 4.758269760415317e-06, 'epoch': 0.53} + 53%|█████▎ | 6460/12188 [13:55:08<12:20:12, 7.75s/it] 53%|█████▎ | 6461/12188 [13:55:15<12:00:20, 7.55s/it] {'loss': 0.3183, 'grad_norm': 0.6936822315608264, 'learning_rate': 4.75694261661466e-06, 'epoch': 0.53} + 53%|█████▎ | 6461/12188 [13:55:15<12:00:20, 7.55s/it] 53%|█████▎ | 6462/12188 [13:55:22<11:39:48, 7.33s/it] {'loss': 0.3271, 'grad_norm': 0.6688800452140071, 'learning_rate': 4.755615489978328e-06, 'epoch': 0.53} + 53%|█████▎ | 6462/12188 [13:55:22<11:39:48, 7.33s/it] 53%|█████▎ | 6463/12188 [13:55:29<11:26:49, 7.20s/it] {'loss': 0.3488, 'grad_norm': 0.705572487552552, 'learning_rate': 4.754288380600037e-06, 'epoch': 0.53} + 53%|█████▎ | 6463/12188 [13:55:29<11:26:49, 7.20s/it] 53%|█████▎ | 6464/12188 [13:55:37<11:48:12, 7.42s/it] {'loss': 0.3581, 'grad_norm': 0.6460349626106814, 'learning_rate': 4.752961288573504e-06, 'epoch': 0.53} + 53%|█████▎ | 6464/12188 [13:55:37<11:48:12, 7.42s/it] 53%|█████▎ | 6465/12188 [13:55:44<11:44:27, 7.39s/it] {'loss': 0.3, 'grad_norm': 0.6225280767979392, 'learning_rate': 4.75163421399245e-06, 'epoch': 0.53} + 53%|█████▎ | 6465/12188 [13:55:44<11:44:27, 7.39s/it] 53%|█████▎ | 6466/12188 [13:55:51<11:41:40, 7.36s/it] {'loss': 0.3001, 'grad_norm': 0.6577282656142559, 'learning_rate': 4.750307156950586e-06, 'epoch': 0.53} + 53%|█████▎ | 6466/12188 [13:55:51<11:41:40, 7.36s/it] 53%|█████▎ | 6467/12188 [13:55:58<11:32:45, 7.27s/it] {'loss': 0.3113, 'grad_norm': 0.6517177485691309, 'learning_rate': 4.7489801175416305e-06, 'epoch': 0.53} + 53%|█████▎ | 6467/12188 [13:55:58<11:32:45, 7.27s/it] 53%|█████▎ | 6468/12188 [13:56:05<11:23:38, 7.17s/it] {'loss': 0.341, 'grad_norm': 0.7370052328886373, 'learning_rate': 4.747653095859298e-06, 'epoch': 0.53} + 53%|█████▎ | 6468/12188 [13:56:05<11:23:38, 7.17s/it] 53%|█████▎ | 6469/12188 [13:56:12<11:08:23, 7.01s/it] {'loss': 0.3708, 'grad_norm': 0.7229813876197587, 'learning_rate': 4.746326091997297e-06, 'epoch': 0.53} + 53%|█████▎ | 6469/12188 [13:56:12<11:08:23, 7.01s/it] 53%|█████▎ | 6470/12188 [13:56:19<11:12:12, 7.05s/it] {'loss': 0.3044, 'grad_norm': 0.6779431675034842, 'learning_rate': 4.744999106049341e-06, 'epoch': 0.53} + 53%|█████▎ | 6470/12188 [13:56:19<11:12:12, 7.05s/it] 53%|█████▎ | 6471/12188 [13:56:27<11:24:47, 7.19s/it] {'loss': 0.3136, 'grad_norm': 0.6884349835089357, 'learning_rate': 4.743672138109136e-06, 'epoch': 0.53} + 53%|█████▎ | 6471/12188 [13:56:27<11:24:47, 7.19s/it] 53%|█████▎ | 6472/12188 [13:56:33<11:11:33, 7.05s/it] {'loss': 0.3163, 'grad_norm': 0.6184939685569363, 'learning_rate': 4.742345188270393e-06, 'epoch': 0.53} + 53%|█████▎ | 6472/12188 [13:56:33<11:11:33, 7.05s/it] 53%|█████▎ | 6473/12188 [13:56:40<11:02:44, 6.96s/it] {'loss': 0.3012, 'grad_norm': 0.6301402176384234, 'learning_rate': 4.741018256626821e-06, 'epoch': 0.53} + 53%|█████▎ | 6473/12188 [13:56:40<11:02:44, 6.96s/it] 53%|█████▎ | 6474/12188 [13:56:48<11:32:00, 7.27s/it] {'loss': 0.3291, 'grad_norm': 0.7017685512897761, 'learning_rate': 4.739691343272121e-06, 'epoch': 0.53} + 53%|█████▎ | 6474/12188 [13:56:48<11:32:00, 7.27s/it] 53%|█████▎ | 6475/12188 [13:56:55<11:26:41, 7.21s/it] {'loss': 0.3226, 'grad_norm': 0.6581269562647446, 'learning_rate': 4.738364448300004e-06, 'epoch': 0.53} + 53%|█████▎ | 6475/12188 [13:56:55<11:26:41, 7.21s/it] 53%|█████▎ | 6476/12188 [13:57:04<12:16:00, 7.73s/it] {'loss': 0.3251, 'grad_norm': 0.6736222748257695, 'learning_rate': 4.737037571804166e-06, 'epoch': 0.53} + 53%|█████▎ | 6476/12188 [13:57:04<12:16:00, 7.73s/it] 53%|█████▎ | 6477/12188 [13:57:11<11:59:46, 7.56s/it] {'loss': 0.3327, 'grad_norm': 0.7475652488462076, 'learning_rate': 4.735710713878313e-06, 'epoch': 0.53} + 53%|█████▎ | 6477/12188 [13:57:11<11:59:46, 7.56s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f134f50ea20> +[Try #0] Failed to fetch sample 4860323 in VC:s3://gui/OS-Atlas/desktop_domain/linux_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f134f50ea20> +Problematic sample: {'image': 'output_20240912_163313_original_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Additional Printer Settings…'"}, {'from': 'gpt', 'value': '\nclick(x=0.1227, y=0.1972)\n'}]} + 53%|█████▎ | 6478/12188 [13:57:19<11:52:22, 7.49s/it] {'loss': 0.3462, 'grad_norm': 0.7146624250264249, 'learning_rate': 4.734383874616145e-06, 'epoch': 0.53} + 53%|█████▎ | 6478/12188 [13:57:19<11:52:22, 7.49s/it] 53%|█████▎ | 6479/12188 [13:57:25<11:31:36, 7.27s/it] {'loss': 0.3432, 'grad_norm': 0.6966893663007123, 'learning_rate': 4.73305705411136e-06, 'epoch': 0.53} + 53%|█████▎ | 6479/12188 [13:57:25<11:31:36, 7.27s/it] 53%|█████▎ | 6480/12188 [13:57:32<11:28:22, 7.24s/it] {'loss': 0.3322, 'grad_norm': 0.6488278528807537, 'learning_rate': 4.731730252457657e-06, 'epoch': 0.53} + 53%|█████▎ | 6480/12188 [13:57:32<11:28:22, 7.24s/it] 53%|█████▎ | 6481/12188 [13:57:40<11:25:45, 7.21s/it] {'loss': 0.3172, 'grad_norm': 0.6617696136848813, 'learning_rate': 4.730403469748735e-06, 'epoch': 0.53} + 53%|█████▎ | 6481/12188 [13:57:40<11:25:45, 7.21s/it] 53%|█████▎ | 6482/12188 [13:58:02<18:36:14, 11.74s/it] {'loss': 0.3917, 'grad_norm': 1.0078933072343452, 'learning_rate': 4.729076706078285e-06, 'epoch': 0.53} + 53%|█████▎ | 6482/12188 [13:58:02<18:36:14, 11.74s/it] 53%|█████▎ | 6483/12188 [13:58:50<36:05:48, 22.78s/it] {'loss': 0.3124, 'grad_norm': 0.6818596879176274, 'learning_rate': 4.727749961540003e-06, 'epoch': 0.53} + 53%|█████▎ | 6483/12188 [13:58:50<36:05:48, 22.78s/it] 53%|█████▎ | 6484/12188 [13:59:32<44:57:30, 28.37s/it] {'loss': 0.3217, 'grad_norm': 0.809636544880875, 'learning_rate': 4.726423236227581e-06, 'epoch': 0.53} + 53%|█████▎ | 6484/12188 [13:59:32<44:57:30, 28.37s/it] 53%|█████▎ | 6485/12188 [14:00:09<49:12:17, 31.06s/it] {'loss': 0.3148, 'grad_norm': 1.1610846092119096, 'learning_rate': 4.72509653023471e-06, 'epoch': 0.53} + 53%|█████▎ | 6485/12188 [14:00:09<49:12:17, 31.06s/it] 53%|█████▎ | 6486/12188 [14:00:18<38:22:36, 24.23s/it] {'loss': 0.3295, 'grad_norm': 0.6721915097275346, 'learning_rate': 4.723769843655083e-06, 'epoch': 0.53} + 53%|█████▎ | 6486/12188 [14:00:18<38:22:36, 24.23s/it] 53%|█████▎ | 6487/12188 [14:00:40<37:34:15, 23.72s/it] {'loss': 0.3311, 'grad_norm': 0.7532798096731711, 'learning_rate': 4.722443176582383e-06, 'epoch': 0.53} + 53%|█████▎ | 6487/12188 [14:00:40<37:34:15, 23.72s/it] 53%|█████▎ | 6488/12188 [14:01:14<42:15:13, 26.69s/it] {'loss': 0.3459, 'grad_norm': 0.6648754405508817, 'learning_rate': 4.721116529110302e-06, 'epoch': 0.53} + 53%|█████▎ | 6488/12188 [14:01:14<42:15:13, 26.69s/it] 53%|█████▎ | 6489/12188 [14:02:05<53:59:27, 34.11s/it] {'loss': 0.3442, 'grad_norm': 0.7693017134025625, 'learning_rate': 4.719789901332521e-06, 'epoch': 0.53} + 53%|█████▎ | 6489/12188 [14:02:05<53:59:27, 34.11s/it] 53%|█████▎ | 6490/12188 [14:02:12<41:06:03, 25.97s/it] {'loss': 0.3164, 'grad_norm': 0.7285608214125033, 'learning_rate': 4.718463293342729e-06, 'epoch': 0.53} + 53%|█████▎ | 6490/12188 [14:02:12<41:06:03, 25.97s/it] 53%|█████▎ | 6491/12188 [14:02:57<50:05:05, 31.65s/it] {'loss': 0.3215, 'grad_norm': 0.8108392229016047, 'learning_rate': 4.7171367052346076e-06, 'epoch': 0.53} + 53%|█████▎ | 6491/12188 [14:02:57<50:05:05, 31.65s/it] 53%|█████▎ | 6492/12188 [14:03:18<45:03:16, 28.48s/it] {'loss': 0.3741, 'grad_norm': 0.6796359953806506, 'learning_rate': 4.715810137101836e-06, 'epoch': 0.53} + 53%|█████▎ | 6492/12188 [14:03:18<45:03:16, 28.48s/it] 53%|█████▎ | 6493/12188 [14:03:40<42:06:42, 26.62s/it] {'loss': 0.3303, 'grad_norm': 1.10414478984892, 'learning_rate': 4.714483589038098e-06, 'epoch': 0.53} + 53%|█████▎ | 6493/12188 [14:03:40<42:06:42, 26.62s/it] 53%|█████▎ | 6494/12188 [14:04:02<39:34:32, 25.02s/it] {'loss': 0.3229, 'grad_norm': 0.6663845799007458, 'learning_rate': 4.713157061137067e-06, 'epoch': 0.53} + 53%|█████▎ | 6494/12188 [14:04:02<39:34:32, 25.02s/it] 53%|█████▎ | 6495/12188 [14:04:23<37:48:23, 23.91s/it] {'loss': 0.316, 'grad_norm': 0.651591871167615, 'learning_rate': 4.711830553492424e-06, 'epoch': 0.53} + 53%|█████▎ | 6495/12188 [14:04:23<37:48:23, 23.91s/it] 53%|█████▎ | 6496/12188 [14:04:30<30:00:25, 18.98s/it] {'loss': 0.304, 'grad_norm': 0.6107987405653001, 'learning_rate': 4.710504066197847e-06, 'epoch': 0.53} + 53%|█████▎ | 6496/12188 [14:04:30<30:00:25, 18.98s/it] 53%|█████▎ | 6497/12188 [14:04:37<24:18:04, 15.37s/it] {'loss': 0.3392, 'grad_norm': 0.7442249931363393, 'learning_rate': 4.709177599347006e-06, 'epoch': 0.53} + 53%|█████▎ | 6497/12188 [14:04:37<24:18:04, 15.37s/it] 53%|█████▎ | 6498/12188 [14:05:06<30:25:57, 19.25s/it] {'loss': 0.3421, 'grad_norm': 0.65793924813501, 'learning_rate': 4.707851153033575e-06, 'epoch': 0.53} + 53%|█████▎ | 6498/12188 [14:05:06<30:25:57, 19.25s/it] 53%|█████▎ | 6499/12188 [14:05:12<24:30:34, 15.51s/it] {'loss': 0.3172, 'grad_norm': 0.6970254263873301, 'learning_rate': 4.706524727351229e-06, 'epoch': 0.53} + 53%|█████▎ | 6499/12188 [14:05:12<24:30:34, 15.51s/it] 53%|█████▎ | 6500/12188 [14:05:19<20:22:25, 12.89s/it] {'loss': 0.3254, 'grad_norm': 0.7515797034929957, 'learning_rate': 4.705198322393632e-06, 'epoch': 0.53} + 53%|█████▎ | 6500/12188 [14:05:19<20:22:25, 12.89s/it] 53%|█████▎ | 6501/12188 [14:05:40<24:02:43, 15.22s/it] {'loss': 0.325, 'grad_norm': 0.7248466507801031, 'learning_rate': 4.7038719382544585e-06, 'epoch': 0.53} + 53%|█████▎ | 6501/12188 [14:05:40<24:02:43, 15.22s/it] 53%|█████▎ | 6502/12188 [14:05:47<19:59:07, 12.65s/it] {'loss': 0.3282, 'grad_norm': 0.7208976210227123, 'learning_rate': 4.702545575027371e-06, 'epoch': 0.53} + 53%|█████▎ | 6502/12188 [14:05:47<19:59:07, 12.65s/it] 53%|█████▎ | 6503/12188 [14:05:54<17:35:00, 11.13s/it] {'loss': 0.2974, 'grad_norm': 0.7295543088469668, 'learning_rate': 4.701219232806038e-06, 'epoch': 0.53} + 53%|█████▎ | 6503/12188 [14:05:54<17:35:00, 11.13s/it] 53%|█████▎ | 6504/12188 [14:06:01<15:41:11, 9.94s/it] {'loss': 0.3058, 'grad_norm': 0.714143762944556, 'learning_rate': 4.699892911684123e-06, 'epoch': 0.53} + 53%|█████▎ | 6504/12188 [14:06:01<15:41:11, 9.94s/it] 53%|█████▎ | 6505/12188 [14:06:10<14:52:29, 9.42s/it] {'loss': 0.3171, 'grad_norm': 0.879078116163491, 'learning_rate': 4.698566611755287e-06, 'epoch': 0.53} + 53%|█████▎ | 6505/12188 [14:06:10<14:52:29, 9.42s/it] 53%|█████▎ | 6506/12188 [14:06:17<13:50:33, 8.77s/it] {'loss': 0.3098, 'grad_norm': 0.7111569102057732, 'learning_rate': 4.697240333113195e-06, 'epoch': 0.53} + 53%|█████▎ | 6506/12188 [14:06:17<13:50:33, 8.77s/it] 53%|█████▎ | 6507/12188 [14:06:24<13:11:26, 8.36s/it] {'loss': 0.3087, 'grad_norm': 0.6382265217833307, 'learning_rate': 4.695914075851502e-06, 'epoch': 0.53} + 53%|█████▎ | 6507/12188 [14:06:24<13:11:26, 8.36s/it] 53%|█████▎ | 6508/12188 [14:06:31<12:20:19, 7.82s/it] {'loss': 0.2996, 'grad_norm': 0.6500691072382634, 'learning_rate': 4.694587840063868e-06, 'epoch': 0.53} + 53%|█████▎ | 6508/12188 [14:06:31<12:20:19, 7.82s/it] 53%|█████▎ | 6509/12188 [14:06:37<11:42:38, 7.42s/it] {'loss': 0.2822, 'grad_norm': 0.6669166899945345, 'learning_rate': 4.693261625843952e-06, 'epoch': 0.53} + 53%|█████▎ | 6509/12188 [14:06:37<11:42:38, 7.42s/it] 53%|█████▎ | 6510/12188 [14:06:44<11:29:30, 7.29s/it] {'loss': 0.3334, 'grad_norm': 0.6764701344101464, 'learning_rate': 4.691935433285407e-06, 'epoch': 0.53} + 53%|█████▎ | 6510/12188 [14:06:44<11:29:30, 7.29s/it] 53%|█████▎ | 6511/12188 [14:06:51<11:25:36, 7.25s/it] {'loss': 0.3078, 'grad_norm': 0.8404237818386513, 'learning_rate': 4.690609262481886e-06, 'epoch': 0.53} + 53%|█████▎ | 6511/12188 [14:06:51<11:25:36, 7.25s/it] 53%|█████▎ | 6512/12188 [14:06:58<11:19:08, 7.18s/it] {'loss': 0.3397, 'grad_norm': 0.7661198897625121, 'learning_rate': 4.689283113527042e-06, 'epoch': 0.53} + 53%|█████▎ | 6512/12188 [14:06:58<11:19:08, 7.18s/it] 53%|█████▎ | 6513/12188 [14:07:08<12:16:00, 7.78s/it] {'loss': 0.288, 'grad_norm': 0.6550702307412601, 'learning_rate': 4.6879569865145245e-06, 'epoch': 0.53} + 53%|█████▎ | 6513/12188 [14:07:08<12:16:00, 7.78s/it] 53%|█████▎ | 6514/12188 [14:07:14<11:52:16, 7.53s/it] {'loss': 0.3292, 'grad_norm': 0.6691834909071003, 'learning_rate': 4.686630881537985e-06, 'epoch': 0.53} + 53%|█████▎ | 6514/12188 [14:07:15<11:52:16, 7.53s/it] 53%|█████▎ | 6515/12188 [14:07:22<11:37:21, 7.38s/it] {'loss': 0.3123, 'grad_norm': 0.7010073174193141, 'learning_rate': 4.685304798691068e-06, 'epoch': 0.53} + 53%|█████▎ | 6515/12188 [14:07:22<11:37:21, 7.38s/it] 53%|█████▎ | 6516/12188 [14:07:28<11:19:21, 7.19s/it] {'loss': 0.3479, 'grad_norm': 0.6515762964319776, 'learning_rate': 4.6839787380674206e-06, 'epoch': 0.53} + 53%|█████▎ | 6516/12188 [14:07:28<11:19:21, 7.19s/it] 53%|█████▎ | 6517/12188 [14:07:36<11:23:33, 7.23s/it] {'loss': 0.315, 'grad_norm': 0.6485335477155159, 'learning_rate': 4.682652699760687e-06, 'epoch': 0.53} + 53%|█████▎ | 6517/12188 [14:07:36<11:23:33, 7.23s/it] 53%|█████▎ | 6518/12188 [14:07:43<11:29:48, 7.30s/it] {'loss': 0.3246, 'grad_norm': 0.729533470897309, 'learning_rate': 4.68132668386451e-06, 'epoch': 0.53} + 53%|█████▎ | 6518/12188 [14:07:43<11:29:48, 7.30s/it] 53%|█████▎ | 6519/12188 [14:07:50<11:19:11, 7.19s/it] {'loss': 0.2955, 'grad_norm': 0.7350256596165342, 'learning_rate': 4.680000690472532e-06, 'epoch': 0.53} + 53%|█████▎ | 6519/12188 [14:07:50<11:19:11, 7.19s/it] 53%|█████▎ | 6520/12188 [14:07:58<11:44:40, 7.46s/it] {'loss': 0.3118, 'grad_norm': 0.711989495172929, 'learning_rate': 4.678674719678389e-06, 'epoch': 0.53} + 53%|█████▎ | 6520/12188 [14:07:58<11:44:40, 7.46s/it] 54%|█████▎ | 6521/12188 [14:08:05<11:41:27, 7.43s/it] {'loss': 0.3387, 'grad_norm': 0.7244283047526591, 'learning_rate': 4.677348771575721e-06, 'epoch': 0.54} + 54%|█████▎ | 6521/12188 [14:08:05<11:41:27, 7.43s/it] 54%|█████▎ | 6522/12188 [14:08:13<11:40:38, 7.42s/it] {'loss': 0.3385, 'grad_norm': 0.7815322995558449, 'learning_rate': 4.676022846258166e-06, 'epoch': 0.54} + 54%|█████▎ | 6522/12188 [14:08:13<11:40:38, 7.42s/it] 54%|█████▎ | 6523/12188 [14:08:20<11:40:40, 7.42s/it] {'loss': 0.3742, 'grad_norm': 0.6570595883675087, 'learning_rate': 4.674696943819355e-06, 'epoch': 0.54} + 54%|█████▎ | 6523/12188 [14:08:20<11:40:40, 7.42s/it] 54%|█████▎ | 6524/12188 [14:08:29<12:09:02, 7.72s/it] {'loss': 0.3134, 'grad_norm': 0.7111641548414062, 'learning_rate': 4.6733710643529265e-06, 'epoch': 0.54} + 54%|█████▎ | 6524/12188 [14:08:29<12:09:02, 7.72s/it] 54%|█████▎ | 6525/12188 [14:08:36<11:45:18, 7.47s/it] {'loss': 0.3367, 'grad_norm': 0.6547611261046177, 'learning_rate': 4.672045207952506e-06, 'epoch': 0.54} + 54%|█████▎ | 6525/12188 [14:08:36<11:45:18, 7.47s/it] 54%|█████▎ | 6526/12188 [14:08:43<11:46:21, 7.49s/it] {'loss': 0.301, 'grad_norm': 1.0412867820418297, 'learning_rate': 4.6707193747117256e-06, 'epoch': 0.54} + 54%|█████▎ | 6526/12188 [14:08:43<11:46:21, 7.49s/it] 54%|█████▎ | 6527/12188 [14:08:50<11:21:49, 7.23s/it] {'loss': 0.3129, 'grad_norm': 0.9507639819477764, 'learning_rate': 4.669393564724216e-06, 'epoch': 0.54} + 54%|█████▎ | 6527/12188 [14:08:50<11:21:49, 7.23s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f09a23af0b0> +[Try #0] Failed to fetch sample 4753471 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f09a23af0b0> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Wristwatches'"}, {'from': 'gpt', 'value': '\nclick(x=0.7655, y=0.266)\n'}]} + 54%|█████▎ | 6528/12188 [14:08:57<11:18:00, 7.19s/it] {'loss': 0.3181, 'grad_norm': 0.6806636921318503, 'learning_rate': 4.6680677780836005e-06, 'epoch': 0.54} + 54%|█████▎ | 6528/12188 [14:08:57<11:18:00, 7.19s/it] 54%|█████▎ | 6529/12188 [14:09:03<11:03:58, 7.04s/it] {'loss': 0.337, 'grad_norm': 0.6765024457244816, 'learning_rate': 4.666742014883506e-06, 'epoch': 0.54} + 54%|█████▎ | 6529/12188 [14:09:03<11:03:58, 7.04s/it] 54%|█████▎ | 6530/12188 [14:09:10<10:59:53, 7.00s/it] {'loss': 0.3472, 'grad_norm': 0.7837383895325591, 'learning_rate': 4.665416275217554e-06, 'epoch': 0.54} + 54%|���████▎ | 6530/12188 [14:09:10<10:59:53, 7.00s/it] 54%|█████▎ | 6531/12188 [14:09:17<11:02:44, 7.03s/it] {'loss': 0.3277, 'grad_norm': 0.6243802344764031, 'learning_rate': 4.664090559179367e-06, 'epoch': 0.54} + 54%|█████▎ | 6531/12188 [14:09:18<11:02:44, 7.03s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fb3679774c0> +[Try #0] Failed to fetch sample 4717633 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fb3679774c0> +Problematic sample: {'image': '20240827_145511_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Action Center, No new notifications'"}, {'from': 'gpt', 'value': '\nclick(x=0.9775, y=0.972)\n'}]} + 54%|█████▎ | 6532/12188 [14:09:24<10:49:47, 6.89s/it] {'loss': 0.3217, 'grad_norm': 0.7030767575038154, 'learning_rate': 4.6627648668625666e-06, 'epoch': 0.54} + 54%|█████▎ | 6532/12188 [14:09:24<10:49:47, 6.89s/it] 54%|█████▎ | 6533/12188 [14:09:31<10:42:34, 6.82s/it] {'loss': 0.3468, 'grad_norm': 0.6322518123806369, 'learning_rate': 4.6614391983607675e-06, 'epoch': 0.54} + 54%|█████▎ | 6533/12188 [14:09:31<10:42:34, 6.82s/it] 54%|█████▎ | 6534/12188 [14:09:38<11:02:02, 7.03s/it] {'loss': 0.3459, 'grad_norm': 0.6966745329001286, 'learning_rate': 4.660113553767588e-06, 'epoch': 0.54} + 54%|█████▎ | 6534/12188 [14:09:38<11:02:02, 7.03s/it] 54%|█████▎ | 6535/12188 [14:09:47<11:41:02, 7.44s/it] {'loss': 0.3711, 'grad_norm': 0.7485775039415573, 'learning_rate': 4.6587879331766465e-06, 'epoch': 0.54} + 54%|█████▎ | 6535/12188 [14:09:47<11:41:02, 7.44s/it] 54%|█████▎ | 6536/12188 [14:09:53<11:22:27, 7.24s/it] {'loss': 0.3291, 'grad_norm': 0.672108897422256, 'learning_rate': 4.6574623366815494e-06, 'epoch': 0.54} + 54%|█████▎ | 6536/12188 [14:09:53<11:22:27, 7.24s/it] 54%|█████▎ | 6537/12188 [14:10:00<11:16:36, 7.18s/it] {'loss': 0.3142, 'grad_norm': 0.6180321161867419, 'learning_rate': 4.656136764375914e-06, 'epoch': 0.54} + 54%|█████▎ | 6537/12188 [14:10:00<11:16:36, 7.18s/it] 54%|█████▎ | 6538/12188 [14:10:08<11:19:54, 7.22s/it] {'loss': 0.3124, 'grad_norm': 0.6491090521966368, 'learning_rate': 4.654811216353347e-06, 'epoch': 0.54} + 54%|█████▎ | 6538/12188 [14:10:08<11:19:54, 7.22s/it] 54%|█████▎ | 6539/12188 [14:10:14<11:03:27, 7.05s/it] {'loss': 0.3202, 'grad_norm': 0.6842240603278845, 'learning_rate': 4.653485692707457e-06, 'epoch': 0.54} + 54%|█████▎ | 6539/12188 [14:10:14<11:03:27, 7.05s/it] 54%|█████▎ | 6540/12188 [14:10:22<11:12:04, 7.14s/it] {'loss': 0.3081, 'grad_norm': 0.6129491544914593, 'learning_rate': 4.652160193531852e-06, 'epoch': 0.54} + 54%|█████▎ | 6540/12188 [14:10:22<11:12:04, 7.14s/it] 54%|███��█▎ | 6541/12188 [14:10:29<11:09:17, 7.11s/it] {'loss': 0.3336, 'grad_norm': 0.6478515178515564, 'learning_rate': 4.650834718920134e-06, 'epoch': 0.54} + 54%|█████▎ | 6541/12188 [14:10:29<11:09:17, 7.11s/it] 54%|█████▎ | 6542/12188 [14:10:36<11:05:22, 7.07s/it] {'loss': 0.3212, 'grad_norm': 0.67484862092707, 'learning_rate': 4.649509268965908e-06, 'epoch': 0.54} + 54%|█████▎ | 6542/12188 [14:10:36<11:05:22, 7.07s/it] 54%|█████▎ | 6543/12188 [14:10:43<10:57:12, 6.99s/it] {'loss': 0.3317, 'grad_norm': 4.4803802416506855, 'learning_rate': 4.648183843762773e-06, 'epoch': 0.54} + 54%|█████▎ | 6543/12188 [14:10:43<10:57:12, 6.99s/it] 54%|█████▎ | 6544/12188 [14:10:50<11:22:46, 7.26s/it] {'loss': 0.3203, 'grad_norm': 0.726755809089181, 'learning_rate': 4.646858443404331e-06, 'epoch': 0.54} + 54%|█████▎ | 6544/12188 [14:10:50<11:22:46, 7.26s/it] 54%|█████▎ | 6545/12188 [14:10:58<11:19:01, 7.22s/it] {'loss': 0.3412, 'grad_norm': 0.6387678064555097, 'learning_rate': 4.645533067984178e-06, 'epoch': 0.54} + 54%|█████▎ | 6545/12188 [14:10:58<11:19:01, 7.22s/it] 54%|█████▎ | 6546/12188 [14:11:05<11:23:22, 7.27s/it] {'loss': 0.2924, 'grad_norm': 0.7488566910173665, 'learning_rate': 4.64420771759591e-06, 'epoch': 0.54} + 54%|█████▎ | 6546/12188 [14:11:05<11:23:22, 7.27s/it] 54%|█████▎ | 6547/12188 [14:11:13<11:33:55, 7.38s/it] {'loss': 0.3257, 'grad_norm': 0.7414996183187145, 'learning_rate': 4.6428823923331225e-06, 'epoch': 0.54} + 54%|█████▎ | 6547/12188 [14:11:13<11:33:55, 7.38s/it] 54%|█████▎ | 6548/12188 [14:11:20<11:27:37, 7.32s/it] {'loss': 0.3606, 'grad_norm': 0.7456565504621376, 'learning_rate': 4.641557092289405e-06, 'epoch': 0.54} + 54%|█████▎ | 6548/12188 [14:11:20<11:27:37, 7.32s/it] 54%|█████▎ | 6549/12188 [14:11:27<11:23:36, 7.27s/it] {'loss': 0.3527, 'grad_norm': 0.8825089360456805, 'learning_rate': 4.6402318175583495e-06, 'epoch': 0.54} + 54%|█████▎ | 6549/12188 [14:11:27<11:23:36, 7.27s/it] 54%|█████▎ | 6550/12188 [14:11:34<11:17:24, 7.21s/it] {'loss': 0.2902, 'grad_norm': 0.6386515963420552, 'learning_rate': 4.638906568233548e-06, 'epoch': 0.54} + 54%|█████▎ | 6550/12188 [14:11:34<11:17:24, 7.21s/it] 54%|█████▎ | 6551/12188 [14:11:41<11:24:55, 7.29s/it] {'loss': 0.2994, 'grad_norm': 0.8115718639219985, 'learning_rate': 4.637581344408581e-06, 'epoch': 0.54} + 54%|█████▎ | 6551/12188 [14:11:42<11:24:55, 7.29s/it] 54%|█████▍ | 6552/12188 [14:11:48<11:14:14, 7.18s/it] {'loss': 0.2908, 'grad_norm': 0.7312280253803255, 'learning_rate': 4.636256146177039e-06, 'epoch': 0.54} + 54%|█████▍ | 6552/12188 [14:11:48<11:14:14, 7.18s/it] 54%|█████▍ | 6553/12188 [14:11:57<11:42:55, 7.48s/it] {'loss': 0.3019, 'grad_norm': 0.6575124584852915, 'learning_rate': 4.634930973632504e-06, 'epoch': 0.54} + 54%|█████▍ | 6553/12188 [14:11:57<11:42:55, 7.48s/it] 54%|█████▍ | 6554/12188 [14:12:04<11:29:02, 7.34s/it] {'loss': 0.3425, 'grad_norm': 0.7336090946429098, 'learning_rate': 4.633605826868556e-06, 'epoch': 0.54} + 54%|█████▍ | 6554/12188 [14:12:04<11:29:02, 7.34s/it] 54%|█████▍ | 6555/12188 [14:12:12<11:58:42, 7.66s/it] {'loss': 0.2908, 'grad_norm': 0.6883905358299838, 'learning_rate': 4.632280705978777e-06, 'epoch': 0.54} + 54%|█████▍ | 6555/12188 [14:12:12<11:58:42, 7.66s/it] 54%|█████▍ | 6556/12188 [14:12:19<11:32:54, 7.38s/it] {'loss': 0.3061, 'grad_norm': 1.0709887603175448, 'learning_rate': 4.630955611056742e-06, 'epoch': 0.54} + 54%|█████▍ | 6556/12188 [14:12:19<11:32:54, 7.38s/it] 54%|█████▍ | 6557/12188 [14:12:25<11:12:32, 7.17s/it] {'loss': 0.3204, 'grad_norm': 0.6883446794617726, 'learning_rate': 4.6296305421960286e-06, 'epoch': 0.54} + 54%|█████▍ | 6557/12188 [14:12:25<11:12:32, 7.17s/it] 54%|█████▍ | 6558/12188 [14:12:33<11:23:55, 7.29s/it] {'loss': 0.3582, 'grad_norm': 1.1456171379238453, 'learning_rate': 4.6283054994902135e-06, 'epoch': 0.54} + 54%|█████▍ | 6558/12188 [14:12:33<11:23:55, 7.29s/it] 54%|█████▍ | 6559/12188 [14:12:40<11:07:31, 7.12s/it] {'loss': 0.3285, 'grad_norm': 0.7795959618777809, 'learning_rate': 4.626980483032866e-06, 'epoch': 0.54} + 54%|█████▍ | 6559/12188 [14:12:40<11:07:31, 7.12s/it] 54%|█████▍ | 6560/12188 [14:12:51<12:52:45, 8.24s/it] {'loss': 0.3006, 'grad_norm': 0.6073082827983459, 'learning_rate': 4.625655492917559e-06, 'epoch': 0.54} + 54%|█████▍ | 6560/12188 [14:12:51<12:52:45, 8.24s/it] 54%|█████▍ | 6561/12188 [14:12:59<12:46:31, 8.17s/it] {'loss': 0.3397, 'grad_norm': 0.7089134435193638, 'learning_rate': 4.624330529237857e-06, 'epoch': 0.54} + 54%|█████▍ | 6561/12188 [14:12:59<12:46:31, 8.17s/it] 54%|█████▍ | 6562/12188 [14:13:05<12:11:13, 7.80s/it] {'loss': 0.3846, 'grad_norm': 0.6551710636253657, 'learning_rate': 4.6230055920873295e-06, 'epoch': 0.54} + 54%|█████▍ | 6562/12188 [14:13:06<12:11:13, 7.80s/it] 54%|█████▍ | 6563/12188 [14:13:14<12:18:32, 7.88s/it] {'loss': 0.3452, 'grad_norm': 0.6850136017563303, 'learning_rate': 4.621680681559543e-06, 'epoch': 0.54} + 54%|█████▍ | 6563/12188 [14:13:14<12:18:32, 7.88s/it] 54%|█████▍ | 6564/12188 [14:13:21<11:55:11, 7.63s/it] {'loss': 0.3129, 'grad_norm': 0.6970733164515353, 'learning_rate': 4.6203557977480575e-06, 'epoch': 0.54} + 54%|█████▍ | 6564/12188 [14:13:21<11:55:11, 7.63s/it] 54%|█████▍ | 6565/12188 [14:13:28<11:39:15, 7.46s/it] {'loss': 0.324, 'grad_norm': 0.7425674882342087, 'learning_rate': 4.619030940746438e-06, 'epoch': 0.54} + 54%|█████▍ | 6565/12188 [14:13:28<11:39:15, 7.46s/it] 54%|█████▍ | 6566/12188 [14:13:35<11:22:10, 7.28s/it] {'loss': 0.331, 'grad_norm': 0.8505817339332054, 'learning_rate': 4.6177061106482404e-06, 'epoch': 0.54} + 54%|█████▍ | 6566/12188 [14:13:35<11:22:10, 7.28s/it] 54%|█████▍ | 6567/12188 [14:13:41<11:10:47, 7.16s/it] {'loss': 0.3174, 'grad_norm': 0.6994951237472067, 'learning_rate': 4.616381307547021e-06, 'epoch': 0.54} + 54%|█████▍ | 6567/12188 [14:13:41<11:10:47, 7.16s/it] 54%|█████▍ | 6568/12188 [14:13:48<11:02:00, 7.07s/it] {'loss': 0.3248, 'grad_norm': 0.6965895524513226, 'learning_rate': 4.61505653153634e-06, 'epoch': 0.54} + 54%|█████▍ | 6568/12188 [14:13:48<11:02:00, 7.07s/it] 54%|█████▍ | 6569/12188 [14:13:55<11:04:53, 7.10s/it] {'loss': 0.3008, 'grad_norm': 0.7480340637392122, 'learning_rate': 4.613731782709746e-06, 'epoch': 0.54} + 54%|█████▍ | 6569/12188 [14:13:55<11:04:53, 7.10s/it] 54%|█████▍ | 6570/12188 [14:14:03<11:26:36, 7.33s/it] {'loss': 0.3428, 'grad_norm': 0.730880122438488, 'learning_rate': 4.612407061160793e-06, 'epoch': 0.54} + 54%|█████▍ | 6570/12188 [14:14:03<11:26:36, 7.33s/it] 54%|█████▍ | 6571/12188 [14:14:10<11:20:33, 7.27s/it] {'loss': 0.3063, 'grad_norm': 0.7724574919827628, 'learning_rate': 4.611082366983033e-06, 'epoch': 0.54} + 54%|█████▍ | 6571/12188 [14:14:10<11:20:33, 7.27s/it] 54%|█████▍ | 6572/12188 [14:14:18<11:18:14, 7.25s/it] {'loss': 0.3285, 'grad_norm': 0.6688265468478232, 'learning_rate': 4.60975770027001e-06, 'epoch': 0.54} + 54%|█████▍ | 6572/12188 [14:14:18<11:18:14, 7.25s/it] 54%|█████▍ | 6573/12188 [14:14:25<11:29:53, 7.37s/it] {'loss': 0.3257, 'grad_norm': 0.760268734959991, 'learning_rate': 4.608433061115272e-06, 'epoch': 0.54} + 54%|█████▍ | 6573/12188 [14:14:25<11:29:53, 7.37s/it] 54%|█████▍ | 6574/12188 [14:14:32<11:16:23, 7.23s/it] {'loss': 0.3367, 'grad_norm': 1.0790434683636105, 'learning_rate': 4.607108449612361e-06, 'epoch': 0.54} + 54%|█████▍ | 6574/12188 [14:14:32<11:16:23, 7.23s/it] 54%|█████▍ | 6575/12188 [14:14:40<11:41:43, 7.50s/it] {'loss': 0.3351, 'grad_norm': 0.6800845461920116, 'learning_rate': 4.605783865854819e-06, 'epoch': 0.54} + 54%|█████▍ | 6575/12188 [14:14:40<11:41:43, 7.50s/it] 54%|█████▍ | 6576/12188 [14:14:47<11:26:47, 7.34s/it] {'loss': 0.3228, 'grad_norm': 0.8670793073886153, 'learning_rate': 4.60445930993619e-06, 'epoch': 0.54} + 54%|█████▍ | 6576/12188 [14:14:47<11:26:47, 7.34s/it] 54%|█████▍ | 6577/12188 [14:14:54<11:14:51, 7.22s/it] {'loss': 0.3014, 'grad_norm': 0.8218375835160058, 'learning_rate': 4.603134781950007e-06, 'epoch': 0.54} + 54%|█████▍ | 6577/12188 [14:14:54<11:14:51, 7.22s/it] 54%|█████▍ | 6578/12188 [14:15:02<11:32:53, 7.41s/it] {'loss': 0.331, 'grad_norm': 0.657048397441641, 'learning_rate': 4.60181028198981e-06, 'epoch': 0.54} + 54%|█████▍ | 6578/12188 [14:15:02<11:32:53, 7.41s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 54%|█████▍ | 6579/12188 [14:15:10<11:40:31, 7.49s/it] {'loss': 0.6581, 'grad_norm': 0.7033570566290194, 'learning_rate': 4.600485810149128e-06, 'epoch': 0.54} + 54%|█████▍ | 6579/12188 [14:15:10<11:40:31, 7.49s/it] 54%|█████▍ | 6580/12188 [14:15:17<11:31:02, 7.39s/it] {'loss': 0.3325, 'grad_norm': 0.7051134799529278, 'learning_rate': 4.5991613665214985e-06, 'epoch': 0.54} + 54%|█████▍ | 6580/12188 [14:15:17<11:31:02, 7.39s/it] 54%|█████▍ | 6581/12188 [14:15:24<11:19:22, 7.27s/it] {'loss': 0.2983, 'grad_norm': 0.6797760777122236, 'learning_rate': 4.59783695120045e-06, 'epoch': 0.54} + 54%|█████▍ | 6581/12188 [14:15:24<11:19:22, 7.27s/it] 54%|█████▍ | 6582/12188 [14:15:32<11:55:09, 7.65s/it] {'loss': 0.3232, 'grad_norm': 0.6543910861638469, 'learning_rate': 4.5965125642795075e-06, 'epoch': 0.54} + 54%|█████▍ | 6582/12188 [14:15:32<11:55:09, 7.65s/it] 54%|█████▍ | 6583/12188 [14:15:39<11:26:58, 7.35s/it] {'loss': 0.3177, 'grad_norm': 1.0036728124221554, 'learning_rate': 4.5951882058522015e-06, 'epoch': 0.54} + 54%|█████▍ | 6583/12188 [14:15:39<11:26:58, 7.35s/it] 54%|█████▍ | 6584/12188 [14:15:47<11:36:52, 7.46s/it] {'loss': 0.3096, 'grad_norm': 0.6023475955121932, 'learning_rate': 4.5938638760120524e-06, 'epoch': 0.54} + 54%|█████▍ | 6584/12188 [14:15:47<11:36:52, 7.46s/it] 54%|█████▍ | 6585/12188 [14:15:54<11:19:14, 7.27s/it] {'loss': 0.3265, 'grad_norm': 0.6636462637436831, 'learning_rate': 4.592539574852584e-06, 'epoch': 0.54} + 54%|█████▍ | 6585/12188 [14:15:54<11:19:14, 7.27s/it] 54%|█████▍ | 6586/12188 [14:16:01<11:31:11, 7.40s/it] {'loss': 0.3298, 'grad_norm': 0.661711445246962, 'learning_rate': 4.591215302467318e-06, 'epoch': 0.54} + 54%|█████▍ | 6586/12188 [14:16:01<11:31:11, 7.40s/it] 54%|█████▍ | 6587/12188 [14:16:10<11:51:45, 7.62s/it] {'loss': 0.3418, 'grad_norm': 0.7273373996880295, 'learning_rate': 4.589891058949769e-06, 'epoch': 0.54} + 54%|█████▍ | 6587/12188 [14:16:10<11:51:45, 7.62s/it] 54%|█████▍ | 6588/12188 [14:16:16<11:28:36, 7.38s/it] {'loss': 0.2985, 'grad_norm': 0.7810569987425767, 'learning_rate': 4.5885668443934544e-06, 'epoch': 0.54} + 54%|█████▍ | 6588/12188 [14:16:16<11:28:36, 7.38s/it] 54%|█████▍ | 6589/12188 [14:16:23<11:10:55, 7.19s/it] {'loss': 0.3831, 'grad_norm': 1.1424902343710133, 'learning_rate': 4.58724265889189e-06, 'epoch': 0.54} + 54%|█████▍ | 6589/12188 [14:16:23<11:10:55, 7.19s/it] 54%|█████▍ | 6590/12188 [14:16:31<11:23:22, 7.32s/it] {'loss': 0.3396, 'grad_norm': 0.8354976116993118, 'learning_rate': 4.585918502538583e-06, 'epoch': 0.54} + 54%|█████▍ | 6590/12188 [14:16:31<11:23:22, 7.32s/it] 54%|█████▍ | 6591/12188 [14:16:38<11:18:50, 7.28s/it] {'loss': 0.3104, 'grad_norm': 0.6796246701671677, 'learning_rate': 4.584594375427049e-06, 'epoch': 0.54} + 54%|█████▍ | 6591/12188 [14:16:38<11:18:50, 7.28s/it] 54%|█████▍ | 6592/12188 [14:16:45<11:21:41, 7.31s/it] {'loss': 0.3233, 'grad_norm': 1.23516468452257, 'learning_rate': 4.583270277650791e-06, 'epoch': 0.54} + 54%|█████▍ | 6592/12188 [14:16:45<11:21:41, 7.31s/it] 54%|█████▍ | 6593/12188 [14:16:53<11:21:45, 7.31s/it] {'loss': 0.3337, 'grad_norm': 0.8085945991237256, 'learning_rate': 4.581946209303318e-06, 'epoch': 0.54} + 54%|█████▍ | 6593/12188 [14:16:53<11:21:45, 7.31s/it] 54%|█████▍ | 6594/12188 [14:16:59<11:06:15, 7.15s/it] {'loss': 0.3398, 'grad_norm': 0.7133299997737675, 'learning_rate': 4.580622170478131e-06, 'epoch': 0.54} + 54%|█████▍ | 6594/12188 [14:16:59<11:06:15, 7.15s/it] 54%|█████▍ | 6595/12188 [14:17:08<11:50:39, 7.62s/it] {'loss': 0.3169, 'grad_norm': 0.6575642525959369, 'learning_rate': 4.579298161268732e-06, 'epoch': 0.54} + 54%|█████▍ | 6595/12188 [14:17:08<11:50:39, 7.62s/it] 54%|█████▍ | 6596/12188 [14:17:16<12:05:37, 7.79s/it] {'loss': 0.323, 'grad_norm': 0.6837877709936927, 'learning_rate': 4.5779741817686226e-06, 'epoch': 0.54} + 54%|█████▍ | 6596/12188 [14:17:16<12:05:37, 7.79s/it] 54%|█████▍ | 6597/12188 [14:17:23<11:38:20, 7.49s/it] {'loss': 0.332, 'grad_norm': 0.6941174723096086, 'learning_rate': 4.576650232071296e-06, 'epoch': 0.54} + 54%|█████▍ | 6597/12188 [14:17:23<11:38:20, 7.49s/it] 54%|█████▍ | 6598/12188 [14:17:31<12:03:43, 7.77s/it] {'loss': 0.3179, 'grad_norm': 0.7722575688044551, 'learning_rate': 4.575326312270251e-06, 'epoch': 0.54} + 54%|█████▍ | 6598/12188 [14:17:31<12:03:43, 7.77s/it] 54%|█████▍ | 6599/12188 [14:17:39<11:46:18, 7.58s/it] {'loss': 0.3227, 'grad_norm': 0.8621510285309543, 'learning_rate': 4.57400242245898e-06, 'epoch': 0.54} + 54%|█████▍ | 6599/12188 [14:17:39<11:46:18, 7.58s/it] 54%|█████▍ | 6600/12188 [14:17:46<11:43:10, 7.55s/it] {'loss': 0.3319, 'grad_norm': 0.6503589754631622, 'learning_rate': 4.5726785627309736e-06, 'epoch': 0.54} + 54%|█████▍ | 6600/12188 [14:17:46<11:43:10, 7.55s/it] 54%|█████▍ | 6601/12188 [14:17:54<11:47:56, 7.60s/it] {'loss': 0.3159, 'grad_norm': 0.6422732477065941, 'learning_rate': 4.57135473317972e-06, 'epoch': 0.54} + 54%|█████▍ | 6601/12188 [14:17:54<11:47:56, 7.60s/it] 54%|█████▍ | 6602/12188 [14:18:01<11:38:18, 7.50s/it] {'loss': 0.3438, 'grad_norm': 0.9469102030730522, 'learning_rate': 4.570030933898708e-06, 'epoch': 0.54} + 54%|█████▍ | 6602/12188 [14:18:01<11:38:18, 7.50s/it] 54%|█████▍ | 6603/12188 [14:18:08<11:15:39, 7.26s/it] {'loss': 0.3023, 'grad_norm': 0.616392985354242, 'learning_rate': 4.568707164981417e-06, 'epoch': 0.54} + 54%|█████▍ | 6603/12188 [14:18:08<11:15:39, 7.26s/it] 54%|█████▍ | 6604/12188 [14:18:15<11:28:30, 7.40s/it] {'loss': 0.3018, 'grad_norm': 0.6457926439133652, 'learning_rate': 4.567383426521337e-06, 'epoch': 0.54} + 54%|█████▍ | 6604/12188 [14:18:16<11:28:30, 7.40s/it] 54%|█████▍ | 6605/12188 [14:18:23<11:34:24, 7.46s/it] {'loss': 0.2994, 'grad_norm': 0.6529895197234888, 'learning_rate': 4.566059718611942e-06, 'epoch': 0.54} + 54%|█████▍ | 6605/12188 [14:18:23<11:34:24, 7.46s/it] 54%|█████▍ | 6606/12188 [14:18:30<11:20:07, 7.31s/it] {'loss': 0.3364, 'grad_norm': 0.7317614076614148, 'learning_rate': 4.564736041346714e-06, 'epoch': 0.54} + 54%|█████▍ | 6606/12188 [14:18:30<11:20:07, 7.31s/it] 54%|█████▍ | 6607/12188 [14:18:37<11:03:07, 7.13s/it] {'loss': 0.3427, 'grad_norm': 0.6580076291083967, 'learning_rate': 4.563412394819126e-06, 'epoch': 0.54} + 54%|█████▍ | 6607/12188 [14:18:37<11:03:07, 7.13s/it] 54%|█████▍ | 6608/12188 [14:18:44<11:05:30, 7.16s/it] {'loss': 0.3396, 'grad_norm': 0.6977790868885471, 'learning_rate': 4.5620887791226546e-06, 'epoch': 0.54} + 54%|█████▍ | 6608/12188 [14:18:44<11:05:30, 7.16s/it] 54%|█████▍ | 6609/12188 [14:18:51<11:02:30, 7.13s/it] {'loss': 0.3381, 'grad_norm': 0.7756527046015252, 'learning_rate': 4.56076519435077e-06, 'epoch': 0.54} + 54%|█████▍ | 6609/12188 [14:18:51<11:02:30, 7.13s/it] 54%|█████▍ | 6610/12188 [14:18:58<11:00:55, 7.11s/it] {'loss': 0.3317, 'grad_norm': 0.8152142357488587, 'learning_rate': 4.559441640596941e-06, 'epoch': 0.54} + 54%|█████▍ | 6610/12188 [14:18:58<11:00:55, 7.11s/it] 54%|█████▍ | 6611/12188 [14:19:05<11:08:08, 7.19s/it] {'loss': 0.3102, 'grad_norm': 0.7048124926049529, 'learning_rate': 4.558118117954634e-06, 'epoch': 0.54} + 54%|█████▍ | 6611/12188 [14:19:05<11:08:08, 7.19s/it] 54%|█████▍ | 6612/12188 [14:19:12<10:49:21, 6.99s/it] {'loss': 0.3187, 'grad_norm': 0.8137955297289524, 'learning_rate': 4.5567946265173185e-06, 'epoch': 0.54} + 54%|█████▍ | 6612/12188 [14:19:12<10:49:21, 6.99s/it] 54%|█████▍ | 6613/12188 [14:19:21<11:33:12, 7.46s/it] {'loss': 0.3079, 'grad_norm': 0.679643449161701, 'learning_rate': 4.555471166378452e-06, 'epoch': 0.54} + 54%|█████▍ | 6613/12188 [14:19:21<11:33:12, 7.46s/it] 54%|█████▍ | 6614/12188 [14:19:27<11:10:49, 7.22s/it] {'loss': 0.2909, 'grad_norm': 0.7122022694526992, 'learning_rate': 4.5541477376315e-06, 'epoch': 0.54} + 54%|█████▍ | 6614/12188 [14:19:27<11:10:49, 7.22s/it] 54%|█████▍ | 6615/12188 [14:19:36<12:01:48, 7.77s/it] {'loss': 0.3066, 'grad_norm': 0.899945066050088, 'learning_rate': 4.5528243403699175e-06, 'epoch': 0.54} + 54%|█████▍ | 6615/12188 [14:19:36<12:01:48, 7.77s/it] 54%|█████▍ | 6616/12188 [14:19:44<11:50:56, 7.66s/it] {'loss': 0.3192, 'grad_norm': 0.8288936387967779, 'learning_rate': 4.5515009746871605e-06, 'epoch': 0.54} + 54%|█████▍ | 6616/12188 [14:19:44<11:50:56, 7.66s/it] 54%|█████▍ | 6617/12188 [14:19:52<12:08:23, 7.84s/it] {'loss': 0.3096, 'grad_norm': 0.6461095103289181, 'learning_rate': 4.550177640676686e-06, 'epoch': 0.54} + 54%|█████▍ | 6617/12188 [14:19:52<12:08:23, 7.84s/it] 54%|█████▍ | 6618/12188 [14:19:59<11:55:52, 7.71s/it] {'loss': 0.304, 'grad_norm': 0.9888061342687005, 'learning_rate': 4.548854338431942e-06, 'epoch': 0.54} + 54%|█████▍ | 6618/12188 [14:19:59<11:55:52, 7.71s/it] 54%|█████▍ | 6619/12188 [14:20:07<11:47:21, 7.62s/it] {'loss': 0.3494, 'grad_norm': 0.9851580699458711, 'learning_rate': 4.5475310680463815e-06, 'epoch': 0.54} + 54%|█████▍ | 6619/12188 [14:20:07<11:47:21, 7.62s/it] 54%|█████▍ | 6620/12188 [14:20:15<12:16:03, 7.93s/it] {'loss': 0.3334, 'grad_norm': 0.6952172240892996, 'learning_rate': 4.546207829613448e-06, 'epoch': 0.54} + 54%|█████▍ | 6620/12188 [14:20:15<12:16:03, 7.93s/it] 54%|█████▍ | 6621/12188 [14:20:23<11:53:42, 7.69s/it] {'loss': 0.312, 'grad_norm': 0.7383570424046088, 'learning_rate': 4.544884623226589e-06, 'epoch': 0.54} + 54%|█████▍ | 6621/12188 [14:20:23<11:53:42, 7.69s/it] 54%|█████▍ | 6622/12188 [14:20:30<11:38:40, 7.53s/it] {'loss': 0.3298, 'grad_norm': 1.114491713813638, 'learning_rate': 4.543561448979249e-06, 'epoch': 0.54} + 54%|█████▍ | 6622/12188 [14:20:30<11:38:40, 7.53s/it] 54%|█████▍ | 6623/12188 [14:20:37<11:18:54, 7.32s/it] {'loss': 0.3605, 'grad_norm': 0.9719120930088114, 'learning_rate': 4.542238306964863e-06, 'epoch': 0.54} + 54%|█████▍ | 6623/12188 [14:20:37<11:18:54, 7.32s/it] 54%|█████▍ | 6624/12188 [14:20:43<11:02:24, 7.14s/it] {'loss': 0.3392, 'grad_norm': 0.9356547788043873, 'learning_rate': 4.540915197276873e-06, 'epoch': 0.54} + 54%|█████▍ | 6624/12188 [14:20:43<11:02:24, 7.14s/it] 54%|█████▍ | 6625/12188 [14:20:50<10:55:03, 7.07s/it] {'loss': 0.3542, 'grad_norm': 1.3961063832115832, 'learning_rate': 4.539592120008716e-06, 'epoch': 0.54} + 54%|█████▍ | 6625/12188 [14:20:50<10:55:03, 7.07s/it] 54%|█████▍ | 6626/12188 [14:20:57<10:46:54, 6.98s/it] {'loss': 0.341, 'grad_norm': 0.6554546061804806, 'learning_rate': 4.538269075253822e-06, 'epoch': 0.54} + 54%|█████▍ | 6626/12188 [14:20:57<10:46:54, 6.98s/it] 54%|█████▍ | 6627/12188 [14:21:04<10:43:35, 6.94s/it] {'loss': 0.3164, 'grad_norm': 1.2053652059598776, 'learning_rate': 4.5369460631056255e-06, 'epoch': 0.54} + 54%|█████▍ | 6627/12188 [14:21:04<10:43:35, 6.94s/it] 54%|█████▍ | 6628/12188 [14:21:12<11:04:47, 7.17s/it] {'loss': 0.3176, 'grad_norm': 0.7000678570825389, 'learning_rate': 4.535623083657554e-06, 'epoch': 0.54} + 54%|█████▍ | 6628/12188 [14:21:12<11:04:47, 7.17s/it] 54%|█████▍ | 6629/12188 [14:21:19<11:18:37, 7.32s/it] {'loss': 0.3071, 'grad_norm': 0.6400194719206977, 'learning_rate': 4.534300137003032e-06, 'epoch': 0.54} + 54%|█████▍ | 6629/12188 [14:21:19<11:18:37, 7.32s/it] 54%|█████▍ | 6630/12188 [14:21:26<11:08:41, 7.22s/it] {'loss': 0.3218, 'grad_norm': 0.8617863792600595, 'learning_rate': 4.532977223235489e-06, 'epoch': 0.54} + 54%|█████▍ | 6630/12188 [14:21:26<11:08:41, 7.22s/it] 54%|█████▍ | 6631/12188 [14:21:34<11:12:47, 7.26s/it] {'loss': 0.3369, 'grad_norm': 0.685124944521495, 'learning_rate': 4.531654342448342e-06, 'epoch': 0.54} + 54%|█████▍ | 6631/12188 [14:21:34<11:12:47, 7.26s/it] 54%|█████▍ | 6632/12188 [14:21:41<11:08:28, 7.22s/it] {'loss': 0.3061, 'grad_norm': 0.7418338856400996, 'learning_rate': 4.530331494735014e-06, 'epoch': 0.54} + 54%|█████▍ | 6632/12188 [14:21:41<11:08:28, 7.22s/it] 54%|█████▍ | 6633/12188 [14:21:48<11:05:00, 7.18s/it] {'loss': 0.2882, 'grad_norm': 0.9614572526358146, 'learning_rate': 4.529008680188921e-06, 'epoch': 0.54} + 54%|█████▍ | 6633/12188 [14:21:48<11:05:00, 7.18s/it] 54%|█████▍ | 6634/12188 [14:21:55<10:53:41, 7.06s/it] {'loss': 0.3072, 'grad_norm': 0.6761184297497992, 'learning_rate': 4.5276858989034785e-06, 'epoch': 0.54} + 54%|█████▍ | 6634/12188 [14:21:55<10:53:41, 7.06s/it] 54%|█████▍ | 6635/12188 [14:22:02<10:53:35, 7.06s/it] {'loss': 0.3204, 'grad_norm': 0.6617353268971472, 'learning_rate': 4.526363150972098e-06, 'epoch': 0.54} + 54%|█████▍ | 6635/12188 [14:22:02<10:53:35, 7.06s/it] 54%|█████▍ | 6636/12188 [14:22:11<11:52:08, 7.70s/it] {'loss': 0.2978, 'grad_norm': 0.6431886015653834, 'learning_rate': 4.525040436488192e-06, 'epoch': 0.54} + 54%|█████▍ | 6636/12188 [14:22:11<11:52:08, 7.70s/it] 54%|█████▍ | 6637/12188 [14:22:18<11:27:26, 7.43s/it] {'loss': 0.3148, 'grad_norm': 0.6921977004461656, 'learning_rate': 4.523717755545169e-06, 'epoch': 0.54} + 54%|█████▍ | 6637/12188 [14:22:18<11:27:26, 7.43s/it] 54%|█████▍ | 6638/12188 [14:22:26<12:00:50, 7.79s/it] {'loss': 0.304, 'grad_norm': 0.6389362104460716, 'learning_rate': 4.5223951082364295e-06, 'epoch': 0.54} + 54%|█████▍ | 6638/12188 [14:22:26<12:00:50, 7.79s/it] 54%|█████▍ | 6639/12188 [14:22:33<11:37:19, 7.54s/it] {'loss': 0.3308, 'grad_norm': 0.6537827326513636, 'learning_rate': 4.52107249465538e-06, 'epoch': 0.54} + 54%|█████▍ | 6639/12188 [14:22:33<11:37:19, 7.54s/it] 54%|█████▍ | 6640/12188 [14:22:41<11:34:25, 7.51s/it] {'loss': 0.3246, 'grad_norm': 0.6661523847902422, 'learning_rate': 4.5197499148954245e-06, 'epoch': 0.54} + 54%|█████▍ | 6640/12188 [14:22:41<11:34:25, 7.51s/it] 54%|█████▍ | 6641/12188 [14:22:47<11:03:48, 7.18s/it] {'loss': 0.3222, 'grad_norm': 1.0730574770050287, 'learning_rate': 4.518427369049955e-06, 'epoch': 0.54} + 54%|█████▍ | 6641/12188 [14:22:47<11:03:48, 7.18s/it] 54%|█████▍ | 6642/12188 [14:22:55<11:25:25, 7.42s/it] {'loss': 0.3254, 'grad_norm': 0.6608812420357539, 'learning_rate': 4.517104857212373e-06, 'epoch': 0.54} + 54%|█████▍ | 6642/12188 [14:22:55<11:25:25, 7.42s/it] 55%|█████▍ | 6643/12188 [14:23:03<11:29:24, 7.46s/it] {'loss': 0.3266, 'grad_norm': 0.739663139133177, 'learning_rate': 4.515782379476071e-06, 'epoch': 0.55} + 55%|█████▍ | 6643/12188 [14:23:03<11:29:24, 7.46s/it] 55%|█████▍ | 6644/12188 [14:23:10<11:20:29, 7.36s/it] {'loss': 0.3537, 'grad_norm': 0.7093361686896349, 'learning_rate': 4.514459935934438e-06, 'epoch': 0.55} + 55%|█████▍ | 6644/12188 [14:23:10<11:20:29, 7.36s/it] 55%|█████▍ | 6645/12188 [14:23:17<11:25:13, 7.42s/it] {'loss': 0.2995, 'grad_norm': 0.8330795751905851, 'learning_rate': 4.513137526680865e-06, 'epoch': 0.55} + 55%|█████▍ | 6645/12188 [14:23:17<11:25:13, 7.42s/it] 55%|█████▍ | 6646/12188 [14:23:25<11:23:00, 7.39s/it] {'loss': 0.3136, 'grad_norm': 0.9528699593062254, 'learning_rate': 4.511815151808737e-06, 'epoch': 0.55} + 55%|█████▍ | 6646/12188 [14:23:25<11:23:00, 7.39s/it] 55%|█████▍ | 6647/12188 [14:23:32<11:23:16, 7.40s/it] {'loss': 0.301, 'grad_norm': 0.8866474588382535, 'learning_rate': 4.510492811411438e-06, 'epoch': 0.55} + 55%|█████▍ | 6647/12188 [14:23:32<11:23:16, 7.40s/it] 55%|█████▍ | 6648/12188 [14:23:40<11:34:04, 7.52s/it] {'loss': 0.3511, 'grad_norm': 0.6826434203615505, 'learning_rate': 4.509170505582353e-06, 'epoch': 0.55} + 55%|█████▍ | 6648/12188 [14:23:40<11:34:04, 7.52s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7faf471712b0> +[Try #0] Failed to fetch sample 4862380 in VC:s3://gui/OS-Atlas/desktop_domain/linux_images. Exception: cannot identify image file <_io.BytesIO object at 0x7faf471712b0> +Problematic sample: {'image': 'output_20240912_163920_original_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'How do I enable restricted codecs to play DVDs? Most commercial DVDs are encrypted and will not play without decryption software.'"}, {'from': 'gpt', 'value': '\nclick(x=0.2156, y=0.5618)\n'}]} + 55%|█████▍ | 6649/12188 [14:23:47<11:22:35, 7.39s/it] {'loss': 0.3189, 'grad_norm': 0.8345956901151385, 'learning_rate': 4.507848234414856e-06, 'epoch': 0.55} + 55%|█████▍ | 6649/12188 [14:23:47<11:22:35, 7.39s/it] 55%|█████▍ | 6650/12188 [14:23:54<11:16:48, 7.33s/it] {'loss': 0.3262, 'grad_norm': 0.7823195407302365, 'learning_rate': 4.506525998002327e-06, 'epoch': 0.55} + 55%|█████▍ | 6650/12188 [14:23:54<11:16:48, 7.33s/it] 55%|█████▍ | 6651/12188 [14:24:01<11:17:54, 7.35s/it] {'loss': 0.3309, 'grad_norm': 0.711771287877461, 'learning_rate': 4.505203796438138e-06, 'epoch': 0.55} + 55%|█████▍ | 6651/12188 [14:24:01<11:17:54, 7.35s/it] 55%|█████▍ | 6652/12188 [14:24:10<11:45:51, 7.65s/it] {'loss': 0.3357, 'grad_norm': 0.7251761480061573, 'learning_rate': 4.503881629815662e-06, 'epoch': 0.55} + 55%|█████▍ | 6652/12188 [14:24:10<11:45:51, 7.65s/it] 55%|█████▍ | 6653/12188 [14:24:18<12:04:39, 7.86s/it] {'loss': 0.3374, 'grad_norm': 0.6801426136197581, 'learning_rate': 4.50255949822827e-06, 'epoch': 0.55} + 55%|█████▍ | 6653/12188 [14:24:18<12:04:39, 7.86s/it] 55%|█████▍ | 6654/12188 [14:24:25<11:46:40, 7.66s/it] {'loss': 0.3106, 'grad_norm': 0.7118957993259311, 'learning_rate': 4.501237401769324e-06, 'epoch': 0.55} + 55%|█████▍ | 6654/12188 [14:24:25<11:46:40, 7.66s/it] 55%|█████▍ | 6655/12188 [14:24:32<11:20:50, 7.38s/it] {'loss': 0.3644, 'grad_norm': 0.6855557638236704, 'learning_rate': 4.499915340532195e-06, 'epoch': 0.55} + 55%|█████▍ | 6655/12188 [14:24:32<11:20:50, 7.38s/it] 55%|█████▍ | 6656/12188 [14:24:39<11:06:54, 7.23s/it] {'loss': 0.3346, 'grad_norm': 0.7206692307428746, 'learning_rate': 4.4985933146102395e-06, 'epoch': 0.55} + 55%|█████▍ | 6656/12188 [14:24:39<11:06:54, 7.23s/it] 55%|█████▍ | 6657/12188 [14:24:46<10:59:56, 7.16s/it] {'loss': 0.3399, 'grad_norm': 0.6730257820120866, 'learning_rate': 4.497271324096817e-06, 'epoch': 0.55} + 55%|█████▍ | 6657/12188 [14:24:46<10:59:56, 7.16s/it] 55%|█████▍ | 6658/12188 [14:24:53<10:54:04, 7.10s/it] {'loss': 0.3473, 'grad_norm': 0.6609151538220458, 'learning_rate': 4.495949369085289e-06, 'epoch': 0.55} + 55%|█████▍ | 6658/12188 [14:24:53<10:54:04, 7.10s/it] 55%|█████▍ | 6659/12188 [14:25:00<10:49:02, 7.04s/it] {'loss': 0.3148, 'grad_norm': 0.654405449148218, 'learning_rate': 4.494627449669004e-06, 'epoch': 0.55} + 55%|█████▍ | 6659/12188 [14:25:00<10:49:02, 7.04s/it] 55%|█████▍ | 6660/12188 [14:25:07<11:00:44, 7.17s/it] {'loss': 0.2668, 'grad_norm': 1.0082717496658828, 'learning_rate': 4.4933055659413175e-06, 'epoch': 0.55} + 55%|█████▍ | 6660/12188 [14:25:07<11:00:44, 7.17s/it] 55%|█████▍ | 6661/12188 [14:25:15<11:03:07, 7.20s/it] {'loss': 0.3631, 'grad_norm': 0.744989151729109, 'learning_rate': 4.4919837179955784e-06, 'epoch': 0.55} + 55%|█████▍ | 6661/12188 [14:25:15<11:03:07, 7.20s/it] 55%|█████▍ | 6662/12188 [14:25:21<10:52:29, 7.08s/it] {'loss': 0.3362, 'grad_norm': 0.6493334677128881, 'learning_rate': 4.490661905925133e-06, 'epoch': 0.55} + 55%|█████▍ | 6662/12188 [14:25:21<10:52:29, 7.08s/it] 55%|█████▍ | 6663/12188 [14:25:28<10:47:10, 7.03s/it] {'loss': 0.3197, 'grad_norm': 0.6868609117274248, 'learning_rate': 4.489340129823325e-06, 'epoch': 0.55} + 55%|█████▍ | 6663/12188 [14:25:28<10:47:10, 7.03s/it] 55%|█████▍ | 6664/12188 [14:25:35<10:48:30, 7.04s/it] {'loss': 0.3479, 'grad_norm': 0.8072744691443682, 'learning_rate': 4.488018389783497e-06, 'epoch': 0.55} + 55%|█████▍ | 6664/12188 [14:25:35<10:48:30, 7.04s/it] 55%|█████▍ | 6665/12188 [14:25:42<10:43:19, 6.99s/it] {'loss': 0.3326, 'grad_norm': 0.6777937321916154, 'learning_rate': 4.486696685898986e-06, 'epoch': 0.55} + 55%|█████▍ | 6665/12188 [14:25:42<10:43:19, 6.99s/it] 55%|█████▍ | 6666/12188 [14:25:50<11:00:26, 7.18s/it] {'loss': 0.3503, 'grad_norm': 0.784364048118098, 'learning_rate': 4.485375018263133e-06, 'epoch': 0.55} + 55%|█████▍ | 6666/12188 [14:25:50<11:00:26, 7.18s/it] 55%|█████▍ | 6667/12188 [14:25:57<10:54:18, 7.11s/it] {'loss': 0.3167, 'grad_norm': 0.8761956018731701, 'learning_rate': 4.484053386969267e-06, 'epoch': 0.55} + 55%|█████▍ | 6667/12188 [14:25:57<10:54:18, 7.11s/it] 55%|█████▍ | 6668/12188 [14:26:04<10:54:51, 7.12s/it] {'loss': 0.3478, 'grad_norm': 0.724389682123803, 'learning_rate': 4.4827317921107246e-06, 'epoch': 0.55} + 55%|█████▍ | 6668/12188 [14:26:04<10:54:51, 7.12s/it] 55%|█████▍ | 6669/12188 [14:26:10<10:38:57, 6.95s/it] {'loss': 0.29, 'grad_norm': 0.6632040821972258, 'learning_rate': 4.48141023378083e-06, 'epoch': 0.55} + 55%|█████▍ | 6669/12188 [14:26:10<10:38:57, 6.95s/it] 55%|█████▍ | 6670/12188 [14:26:18<10:55:20, 7.13s/it] {'loss': 0.3183, 'grad_norm': 0.6922198650856087, 'learning_rate': 4.480088712072911e-06, 'epoch': 0.55} + 55%|█████▍ | 6670/12188 [14:26:18<10:55:20, 7.13s/it] 55%|█████▍ | 6671/12188 [14:26:25<10:56:41, 7.14s/it] {'loss': 0.3325, 'grad_norm': 0.7466822495656646, 'learning_rate': 4.4787672270802945e-06, 'epoch': 0.55} + 55%|█████▍ | 6671/12188 [14:26:25<10:56:41, 7.14s/it] 55%|█████▍ | 6672/12188 [14:26:32<10:49:45, 7.07s/it] {'loss': 0.2939, 'grad_norm': 0.6615232563839297, 'learning_rate': 4.477445778896296e-06, 'epoch': 0.55} + 55%|█████▍ | 6672/12188 [14:26:32<10:49:45, 7.07s/it] 55%|█████▍ | 6673/12188 [14:26:39<10:48:28, 7.06s/it] {'loss': 0.3059, 'grad_norm': 0.9557263149265696, 'learning_rate': 4.476124367614237e-06, 'epoch': 0.55} + 55%|█████▍ | 6673/12188 [14:26:39<10:48:28, 7.06s/it] 55%|█████▍ | 6674/12188 [14:26:46<10:39:07, 6.95s/it] {'loss': 0.2868, 'grad_norm': 0.7560678458323987, 'learning_rate': 4.474802993327437e-06, 'epoch': 0.55} + 55%|█████▍ | 6674/12188 [14:26:46<10:39:07, 6.95s/it] 55%|█████▍ | 6675/12188 [14:26:54<11:06:59, 7.26s/it] {'loss': 0.3744, 'grad_norm': 0.708755279442068, 'learning_rate': 4.473481656129202e-06, 'epoch': 0.55} + 55%|█████▍ | 6675/12188 [14:26:54<11:06:59, 7.26s/it] 55%|█████▍ | 6676/12188 [14:27:01<10:52:09, 7.10s/it] {'loss': 0.2864, 'grad_norm': 0.6570744530048801, 'learning_rate': 4.472160356112849e-06, 'epoch': 0.55} + 55%|█████▍ | 6676/12188 [14:27:01<10:52:09, 7.10s/it] 55%|█████▍ | 6677/12188 [14:27:09<11:37:31, 7.59s/it] {'loss': 0.3409, 'grad_norm': 0.731198301230586, 'learning_rate': 4.470839093371684e-06, 'epoch': 0.55} + 55%|█████▍ | 6677/12188 [14:27:09<11:37:31, 7.59s/it] 55%|█████▍ | 6678/12188 [14:27:16<11:14:31, 7.35s/it] {'loss': 0.3246, 'grad_norm': 0.6965629560438585, 'learning_rate': 4.469517867999009e-06, 'epoch': 0.55} + 55%|█████▍ | 6678/12188 [14:27:16<11:14:31, 7.35s/it] 55%|█████▍ | 6679/12188 [14:27:23<11:07:34, 7.27s/it] {'loss': 0.3393, 'grad_norm': 0.8183525515971455, 'learning_rate': 4.4681966800881334e-06, 'epoch': 0.55} + 55%|█████▍ | 6679/12188 [14:27:23<11:07:34, 7.27s/it] 55%|█████▍ | 6680/12188 [14:27:30<10:58:26, 7.17s/it] {'loss': 0.3185, 'grad_norm': 0.9071918057208048, 'learning_rate': 4.4668755297323515e-06, 'epoch': 0.55} + 55%|█████▍ | 6680/12188 [14:27:30<10:58:26, 7.17s/it] 55%|█████▍ | 6681/12188 [14:27:37<10:55:13, 7.14s/it] {'loss': 0.3855, 'grad_norm': 0.741037494976712, 'learning_rate': 4.465554417024965e-06, 'epoch': 0.55} + 55%|█████▍ | 6681/12188 [14:27:37<10:55:13, 7.14s/it] 55%|█████▍ | 6682/12188 [14:27:45<11:04:18, 7.24s/it] {'loss': 0.3299, 'grad_norm': 0.8050012637202707, 'learning_rate': 4.464233342059264e-06, 'epoch': 0.55} + 55%|█████▍ | 6682/12188 [14:27:45<11:04:18, 7.24s/it] 55%|█████▍ | 6683/12188 [14:27:52<10:58:18, 7.17s/it] {'loss': 0.2782, 'grad_norm': 0.6740747319463376, 'learning_rate': 4.462912304928546e-06, 'epoch': 0.55} + 55%|██��██▍ | 6683/12188 [14:27:52<10:58:18, 7.17s/it] 55%|█████▍ | 6684/12188 [14:27:59<11:06:04, 7.26s/it] {'loss': 0.3174, 'grad_norm': 0.6674335800787131, 'learning_rate': 4.4615913057260986e-06, 'epoch': 0.55} + 55%|█████▍ | 6684/12188 [14:27:59<11:06:04, 7.26s/it] 55%|█████▍ | 6685/12188 [14:28:07<11:12:44, 7.33s/it] {'loss': 0.3634, 'grad_norm': 0.8078423242041717, 'learning_rate': 4.460270344545206e-06, 'epoch': 0.55} + 55%|█████▍ | 6685/12188 [14:28:07<11:12:44, 7.33s/it] 55%|█████▍ | 6686/12188 [14:28:14<11:04:19, 7.24s/it] {'loss': 0.3336, 'grad_norm': 0.645242762389889, 'learning_rate': 4.458949421479157e-06, 'epoch': 0.55} + 55%|█████▍ | 6686/12188 [14:28:14<11:04:19, 7.24s/it] 55%|█████▍ | 6687/12188 [14:28:21<10:55:46, 7.15s/it] {'loss': 0.3468, 'grad_norm': 3.593996931493718, 'learning_rate': 4.457628536621228e-06, 'epoch': 0.55} + 55%|█████▍ | 6687/12188 [14:28:21<10:55:46, 7.15s/it] 55%|█████▍ | 6688/12188 [14:28:28<10:50:45, 7.10s/it] {'loss': 0.2979, 'grad_norm': 0.659171277611106, 'learning_rate': 4.4563076900647e-06, 'epoch': 0.55} + 55%|█████▍ | 6688/12188 [14:28:28<10:50:45, 7.10s/it] 55%|█████▍ | 6689/12188 [14:28:35<11:11:25, 7.33s/it] {'loss': 0.3139, 'grad_norm': 0.7168289246697148, 'learning_rate': 4.454986881902853e-06, 'epoch': 0.55} + 55%|█████▍ | 6689/12188 [14:28:35<11:11:25, 7.33s/it] 55%|█████▍ | 6690/12188 [14:28:43<11:08:17, 7.29s/it] {'loss': 0.3161, 'grad_norm': 0.7853451511678374, 'learning_rate': 4.453666112228955e-06, 'epoch': 0.55} + 55%|█████▍ | 6690/12188 [14:28:43<11:08:17, 7.29s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1348, in _get_item + raise ValueError( +ValueError: Number of image tokens ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'] does not match number of images None +[Try #0] Failed to fetch sample 1134855 in VC:s3://gui/aguvis/aguvis-stage2/amex/images. Exception: Number of image tokens ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'] does not match number of images None +Problematic sample: {'image': ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'], 'conversations': [{'from': 'human', 'value': "\nPlease generate the next move according to the UI screenshot, the task and previous operations.\n\nTask:\nI want to book a hotel in london, prize should be less than $600, guest rating is 8+, 4 star rating, breakfast included\n\nPrevious operations:\nStep 1: Tap on the Chrome app to start searching for hotels in London.\nStep 2: Type 'Chrome' in the search bar to open the Chrome browser.\nStep 3: Tap on the Hotels.com app icon to begin searching for a hotel.\nStep 4: Wait for the Hotels.com app to finish loading to proceed with the hotel search.\nStep 5: Tap on the 'Going to' field to enter London as the destination.\nStep 6: Tap on 'London' from Recent Searches to select it as the destination for the hotel search.\nStep 7: Tap on 'London, England, United Kingdom' to select it as the destination for the hotel search.\nStep 8: Tap on the 'Search' button to view available hotels in London.\nStep 9: Tap on the filter icon or option to apply filters to the hotel search results.\nStep 10: Swipe up on the screen to reveal more filter options, such as guest rating and hotel stars.\nStep 11: Swipe up to reveal more filter options, specifically guest ratings and hotel stars.\nStep 12: Tap on the 4-star rating box to select it.\nStep 13: Tap 'Done' to confirm the current filter selections and proceed.\nStep 14: Swipe up on the screen to locate and select the 'breakfast included' filter option.\nStep 15: Tap on 'Breakfast included' to select it.\nStep 16: Tap on the 'Done' button to apply the filters and view available hotel options.\nStep 17: Tap on the topmost hotel card to view its details.\nStep 18: Tap on the 'Select a room' button to check available room options and their prices.\nStep 19: Tap on 'Select and customize' to proceed with the room selection and customization options.\nStep 20: Tap on 'Breakfast buffet' to select it, then tap on 'Reserve' to book the room."}, {'from': 'gpt', 'value': "\nThe screen is displaying a loading indicator, suggesting that the reservation process might be finalizing or confirmation details are being loaded. Given the sequence of booking actions, it seems the process is almost complete. The loading sign indicates that the system is processing the final booking or confirmation step.\n\n\nWait for the loading to complete to confirm the booking status.\n\n\nterminate(status='success')\n"}]} + 55%|█████▍ | 6691/12188 [14:28:50<11:11:46, 7.33s/it] {'loss': 0.3105, 'grad_norm': 0.9900074292994447, 'learning_rate': 4.4523453811362765e-06, 'epoch': 0.55} + 55%|█████▍ | 6691/12188 [14:28:50<11:11:46, 7.33s/it] 55%|█████▍ | 6692/12188 [14:28:57<10:57:57, 7.18s/it] {'loss': 0.3039, 'grad_norm': 0.8189521398955933, 'learning_rate': 4.45102468871809e-06, 'epoch': 0.55} + 55%|█████▍ | 6692/12188 [14:28:57<10:57:57, 7.18s/it] 55%|█████▍ | 6693/12188 [14:29:03<10:42:25, 7.01s/it] {'loss': 0.3203, 'grad_norm': 0.7061003194962877, 'learning_rate': 4.449704035067656e-06, 'epoch': 0.55} + 55%|█████▍ | 6693/12188 [14:29:03<10:42:25, 7.01s/it] 55%|█████▍ | 6694/12188 [14:29:10<10:24:43, 6.82s/it] {'loss': 0.346, 'grad_norm': 0.9435476808302292, 'learning_rate': 4.448383420278241e-06, 'epoch': 0.55} + 55%|█████▍ | 6694/12188 [14:29:10<10:24:43, 6.82s/it] 55%|█████▍ | 6695/12188 [14:29:17<10:28:07, 6.86s/it] {'loss': 0.3018, 'grad_norm': 0.690581851337164, 'learning_rate': 4.4470628444431e-06, 'epoch': 0.55} + 55%|█████▍ | 6695/12188 [14:29:17<10:28:07, 6.86s/it] 55%|█████▍ | 6696/12188 [14:29:24<10:32:50, 6.91s/it] {'loss': 0.3643, 'grad_norm': 0.6472165663482872, 'learning_rate': 4.445742307655495e-06, 'epoch': 0.55} + 55%|█████▍ | 6696/12188 [14:29:24<10:32:50, 6.91s/it] 55%|█████▍ | 6697/12188 [14:29:31<10:34:59, 6.94s/it] {'loss': 0.3207, 'grad_norm': 0.7541689710722681, 'learning_rate': 4.4444218100086765e-06, 'epoch': 0.55} + 55%|█████▍ | 6697/12188 [14:29:31<10:34:59, 6.94s/it] 55%|█████▍ | 6698/12188 [14:29:39<10:55:29, 7.16s/it] {'loss': 0.3338, 'grad_norm': 0.7686037909618954, 'learning_rate': 4.443101351595897e-06, 'epoch': 0.55} + 55%|█████▍ | 6698/12188 [14:29:39<10:55:29, 7.16s/it] 55%|█████▍ | 6699/12188 [14:29:45<10:45:58, 7.06s/it] {'loss': 0.3798, 'grad_norm': 0.6885075155305106, 'learning_rate': 4.441780932510408e-06, 'epoch': 0.55} + 55%|█████▍ | 6699/12188 [14:29:45<10:45:58, 7.06s/it] 55%|█████▍ | 6700/12188 [14:29:53<10:48:16, 7.09s/it] {'loss': 0.3105, 'grad_norm': 0.7860218327135049, 'learning_rate': 4.440460552845449e-06, 'epoch': 0.55} + 55%|█████▍ | 6700/12188 [14:29:53<10:48:16, 7.09s/it] 55%|█████▍ | 6701/12188 [14:30:00<10:46:05, 7.06s/it] {'loss': 0.3337, 'grad_norm': 0.663656667139125, 'learning_rate': 4.439140212694267e-06, 'epoch': 0.55} + 55%|█████▍ | 6701/12188 [14:30:00<10:46:05, 7.06s/it] 55%|█████▍ | 6702/12188 [14:30:08<11:18:04, 7.42s/it] {'loss': 0.3377, 'grad_norm': 0.6483270298335215, 'learning_rate': 4.437819912150106e-06, 'epoch': 0.55} + 55%|█████▍ | 6702/12188 [14:30:08<11:18:04, 7.42s/it] 55%|█████▍ | 6703/12188 [14:30:16<11:46:10, 7.72s/it] {'loss': 0.3249, 'grad_norm': 0.7096493246330439, 'learning_rate': 4.436499651306195e-06, 'epoch': 0.55} + 55%|█████▍ | 6703/12188 [14:30:16<11:46:10, 7.72s/it] 55%|█████▌ | 6704/12188 [14:30:25<12:07:01, 7.95s/it] {'loss': 0.3385, 'grad_norm': 1.3572510588438054, 'learning_rate': 4.4351794302557765e-06, 'epoch': 0.55} + 55%|█████▌ | 6704/12188 [14:30:25<12:07:01, 7.95s/it] 55%|█████▌ | 6705/12188 [14:30:31<11:30:13, 7.55s/it] {'loss': 0.3372, 'grad_norm': 0.6547384999574851, 'learning_rate': 4.4338592490920775e-06, 'epoch': 0.55} + 55%|█████▌ | 6705/12188 [14:30:31<11:30:13, 7.55s/it] 55%|█████▌ | 6706/12188 [14:30:38<11:11:39, 7.35s/it] {'loss': 0.3305, 'grad_norm': 0.715939070206566, 'learning_rate': 4.432539107908329e-06, 'epoch': 0.55} + 55%|█████▌ | 6706/12188 [14:30:38<11:11:39, 7.35s/it] 55%|█████▌ | 6707/12188 [14:30:45<10:56:48, 7.19s/it] {'loss': 0.3398, 'grad_norm': 0.6517453038810681, 'learning_rate': 4.431219006797758e-06, 'epoch': 0.55} + 55%|█████▌ | 6707/12188 [14:30:45<10:56:48, 7.19s/it] 55%|█████▌ | 6708/12188 [14:30:52<11:02:52, 7.26s/it] {'loss': 0.3452, 'grad_norm': 0.7254858949900578, 'learning_rate': 4.429898945853585e-06, 'epoch': 0.55} + 55%|█████▌ | 6708/12188 [14:30:52<11:02:52, 7.26s/it] 55%|█████▌ | 6709/12188 [14:30:59<10:55:20, 7.18s/it] {'loss': 0.306, 'grad_norm': 0.6930645994314034, 'learning_rate': 4.428578925169033e-06, 'epoch': 0.55} + 55%|█████▌ | 6709/12188 [14:30:59<10:55:20, 7.18s/it] 55%|█████▌ | 6710/12188 [14:31:06<10:38:59, 7.00s/it] {'loss': 0.3174, 'grad_norm': 0.7031778482582536, 'learning_rate': 4.427258944837321e-06, 'epoch': 0.55} + 55%|█████▌ | 6710/12188 [14:31:06<10:38:59, 7.00s/it] 55%|█████▌ | 6711/12188 [14:31:13<10:43:25, 7.05s/it] {'loss': 0.3447, 'grad_norm': 1.2189381894609415, 'learning_rate': 4.425939004951661e-06, 'epoch': 0.55} + 55%|█████▌ | 6711/12188 [14:31:13<10:43:25, 7.05s/it] 55%|█████▌ | 6712/12188 [14:31:20<10:37:52, 6.99s/it] {'loss': 0.3316, 'grad_norm': 0.6703564210035865, 'learning_rate': 4.424619105605267e-06, 'epoch': 0.55} + 55%|█████▌ | 6712/12188 [14:31:20<10:37:52, 6.99s/it] 55%|█████▌ | 6713/12188 [14:31:27<10:43:26, 7.05s/it] {'loss': 0.3066, 'grad_norm': 0.6344969656559866, 'learning_rate': 4.423299246891346e-06, 'epoch': 0.55} + 55%|█████▌ | 6713/12188 [14:31:27<10:43:26, 7.05s/it] 55%|█████▌ | 6714/12188 [14:31:34<10:30:49, 6.91s/it] {'loss': 0.3092, 'grad_norm': 0.6972037378701752, 'learning_rate': 4.4219794289031045e-06, 'epoch': 0.55} + 55%|█████▌ | 6714/12188 [14:31:34<10:30:49, 6.91s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f26e295cf90> +[Try #0] Failed to fetch sample 4723203 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f26e295cf90> +Problematic sample: {'image': '20240823_045930_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Privacy & Cookies'"}, {'from': 'gpt', 'value': '\nclick(x=0.5935, y=0.921)\n'}]} + 55%|█████▌ | 6715/12188 [14:31:41<10:48:37, 7.11s/it] {'loss': 0.2955, 'grad_norm': 0.8036992025463134, 'learning_rate': 4.420659651733751e-06, 'epoch': 0.55} + 55%|█████▌ | 6715/12188 [14:31:41<10:48:37, 7.11s/it] 55%|█████▌ | 6716/12188 [14:31:51<11:48:40, 7.77s/it] {'loss': 0.2755, 'grad_norm': 0.7039491486923808, 'learning_rate': 4.419339915476478e-06, 'epoch': 0.55} + 55%|█████▌ | 6716/12188 [14:31:51<11:48:40, 7.77s/it] 55%|█████▌ | 6717/12188 [14:31:57<11:22:39, 7.49s/it] {'loss': 0.3328, 'grad_norm': 0.6365439227696155, 'learning_rate': 4.418020220224491e-06, 'epoch': 0.55} + 55%|█████▌ | 6717/12188 [14:31:58<11:22:39, 7.49s/it] 55%|█████▌ | 6718/12188 [14:32:04<11:01:08, 7.25s/it] {'loss': 0.3123, 'grad_norm': 0.9468601774605638, 'learning_rate': 4.416700566070979e-06, 'epoch': 0.55} + 55%|█████▌ | 6718/12188 [14:32:04<11:01:08, 7.25s/it] 55%|█████▌ | 6719/12188 [14:32:11<10:52:57, 7.16s/it] {'loss': 0.2963, 'grad_norm': 0.6884106387113409, 'learning_rate': 4.415380953109135e-06, 'epoch': 0.55} + 55%|█████▌ | 6719/12188 [14:32:11<10:52:57, 7.16s/it] 55%|█████▌ | 6720/12188 [14:32:19<11:09:28, 7.35s/it] {'loss': 0.3256, 'grad_norm': 0.7056247085879479, 'learning_rate': 4.4140613814321505e-06, 'epoch': 0.55} + 55%|█████▌ | 6720/12188 [14:32:19<11:09:28, 7.35s/it] 55%|█████▌ | 6721/12188 [14:32:27<11:30:56, 7.58s/it] {'loss': 0.3119, 'grad_norm': 1.1215063428427814, 'learning_rate': 4.412741851133209e-06, 'epoch': 0.55} + 55%|█████▌ | 6721/12188 [14:32:27<11:30:56, 7.58s/it] 55%|█████▌ | 6722/12188 [14:32:34<11:05:26, 7.30s/it] {'loss': 0.3552, 'grad_norm': 0.7166992462544531, 'learning_rate': 4.411422362305495e-06, 'epoch': 0.55} + 55%|█████▌ | 6722/12188 [14:32:34<11:05:26, 7.30s/it] 55%|█████▌ | 6723/12188 [14:32:41<10:57:03, 7.21s/it] {'loss': 0.3421, 'grad_norm': 0.6369228781484115, 'learning_rate': 4.410102915042187e-06, 'epoch': 0.55} + 55%|█████▌ | 6723/12188 [14:32:41<10:57:03, 7.21s/it] 55%|█████▌ | 6724/12188 [14:32:48<10:49:07, 7.13s/it] {'loss': 0.3248, 'grad_norm': 0.9434176081791087, 'learning_rate': 4.4087835094364645e-06, 'epoch': 0.55} + 55%|█████▌ | 6724/12188 [14:32:48<10:49:07, 7.13s/it] 55%|█████▌ | 6725/12188 [14:32:54<10:39:06, 7.02s/it] {'loss': 0.3074, 'grad_norm': 0.6279072983283516, 'learning_rate': 4.407464145581501e-06, 'epoch': 0.55} + 55%|█████▌ | 6725/12188 [14:32:54<10:39:06, 7.02s/it] 55%|█████▌ | 6726/12188 [14:33:01<10:36:29, 6.99s/it] {'loss': 0.3321, 'grad_norm': 0.9138458016388096, 'learning_rate': 4.406144823570467e-06, 'epoch': 0.55} + 55%|█████▌ | 6726/12188 [14:33:01<10:36:29, 6.99s/it] 55%|█████▌ | 6727/12188 [14:33:08<10:36:25, 6.99s/it] {'loss': 0.3223, 'grad_norm': 0.6866479284255324, 'learning_rate': 4.4048255434965305e-06, 'epoch': 0.55} + 55%|█████▌ | 6727/12188 [14:33:08<10:36:25, 6.99s/it] 55%|█████▌ | 6728/12188 [14:33:17<11:21:16, 7.49s/it] {'loss': 0.3182, 'grad_norm': 0.83961677554453, 'learning_rate': 4.40350630545286e-06, 'epoch': 0.55} + 55%|█████▌ | 6728/12188 [14:33:17<11:21:16, 7.49s/it] 55%|█████▌ | 6729/12188 [14:33:24<11:13:09, 7.40s/it] {'loss': 0.3191, 'grad_norm': 0.6870461811563392, 'learning_rate': 4.402187109532613e-06, 'epoch': 0.55} + 55%|█████▌ | 6729/12188 [14:33:24<11:13:09, 7.40s/it] 55%|█████▌ | 6730/12188 [14:33:31<11:00:08, 7.26s/it] {'loss': 0.3499, 'grad_norm': 0.8083947243482414, 'learning_rate': 4.400867955828955e-06, 'epoch': 0.55} + 55%|█████▌ | 6730/12188 [14:33:31<11:00:08, 7.26s/it] 55%|█████▌ | 6731/12188 [14:33:38<11:02:03, 7.28s/it] {'loss': 0.3221, 'grad_norm': 1.092232046994438, 'learning_rate': 4.399548844435037e-06, 'epoch': 0.55} + 55%|█████▌ | 6731/12188 [14:33:38<11:02:03, 7.28s/it] 55%|█████▌ | 6732/12188 [14:33:46<11:17:50, 7.45s/it] {'loss': 0.3299, 'grad_norm': 0.7652380165480627, 'learning_rate': 4.398229775444016e-06, 'epoch': 0.55} + 55%|█████▌ | 6732/12188 [14:33:46<11:17:50, 7.45s/it] 55%|█████▌ | 6733/12188 [14:33:53<10:57:50, 7.24s/it] {'loss': 0.3469, 'grad_norm': 0.7414088503027958, 'learning_rate': 4.396910748949043e-06, 'epoch': 0.55} + 55%|█████▌ | 6733/12188 [14:33:53<10:57:50, 7.24s/it] 55%|█████▌ | 6734/12188 [14:34:01<11:14:58, 7.43s/it] {'loss': 0.3324, 'grad_norm': 0.713425595024947, 'learning_rate': 4.395591765043261e-06, 'epoch': 0.55} + 55%|█████▌ | 6734/12188 [14:34:01<11:14:58, 7.43s/it] 55%|█████▌ | 6735/12188 [14:34:08<11:00:37, 7.27s/it] {'loss': 0.2983, 'grad_norm': 0.6979651575013641, 'learning_rate': 4.39427282381982e-06, 'epoch': 0.55} + 55%|█████▌ | 6735/12188 [14:34:08<11:00:37, 7.27s/it] 55%|█████▌ | 6736/12188 [14:34:16<11:20:11, 7.49s/it] {'loss': 0.3251, 'grad_norm': 0.6900268606288174, 'learning_rate': 4.3929539253718565e-06, 'epoch': 0.55} + 55%|█████▌ | 6736/12188 [14:34:16<11:20:11, 7.49s/it] 55%|█████▌ | 6737/12188 [14:34:23<11:02:06, 7.29s/it] {'loss': 0.3161, 'grad_norm': 0.7035175844752444, 'learning_rate': 4.391635069792512e-06, 'epoch': 0.55} + 55%|█████▌ | 6737/12188 [14:34:23<11:02:06, 7.29s/it] 55%|█████▌ | 6738/12188 [14:34:30<11:03:01, 7.30s/it] {'loss': 0.3362, 'grad_norm': 0.6874203651610159, 'learning_rate': 4.3903162571749234e-06, 'epoch': 0.55} + 55%|█████▌ | 6738/12188 [14:34:30<11:03:01, 7.30s/it] 55%|█████▌ | 6739/12188 [14:34:37<11:04:06, 7.31s/it] {'loss': 0.3305, 'grad_norm': 0.650741466047761, 'learning_rate': 4.388997487612221e-06, 'epoch': 0.55} + 55%|█████▌ | 6739/12188 [14:34:37<11:04:06, 7.31s/it] 55%|█████▌ | 6740/12188 [14:34:44<10:47:04, 7.13s/it] {'loss': 0.3141, 'grad_norm': 0.8554196030194147, 'learning_rate': 4.387678761197534e-06, 'epoch': 0.55} + 55%|█████▌ | 6740/12188 [14:34:44<10:47:04, 7.13s/it] 55%|█████▌ | 6741/12188 [14:34:52<11:07:34, 7.35s/it] {'loss': 0.3335, 'grad_norm': 0.7414369907438002, 'learning_rate': 4.386360078023987e-06, 'epoch': 0.55} + 55%|█████▌ | 6741/12188 [14:34:52<11:07:34, 7.35s/it] 55%|█████▌ | 6742/12188 [14:34:59<10:55:43, 7.22s/it] {'loss': 0.3137, 'grad_norm': 0.7447946106280197, 'learning_rate': 4.385041438184706e-06, 'epoch': 0.55} + 55%|█████▌ | 6742/12188 [14:34:59<10:55:43, 7.22s/it] 55%|█████▌ | 6743/12188 [14:35:06<11:00:31, 7.28s/it] {'loss': 0.3175, 'grad_norm': 0.6206666292183329, 'learning_rate': 4.383722841772813e-06, 'epoch': 0.55} + 55%|█████▌ | 6743/12188 [14:35:06<11:00:31, 7.28s/it] 55%|█████▌ | 6744/12188 [14:35:13<10:50:10, 7.17s/it] {'loss': 0.3135, 'grad_norm': 0.6664525873469053, 'learning_rate': 4.38240428888142e-06, 'epoch': 0.55} + 55%|█████▌ | 6744/12188 [14:35:13<10:50:10, 7.17s/it] 55%|█████▌ | 6745/12188 [14:35:20<10:40:38, 7.06s/it] {'loss': 0.335, 'grad_norm': 0.8143191440183999, 'learning_rate': 4.381085779603645e-06, 'epoch': 0.55} + 55%|█████▌ | 6745/12188 [14:35:20<10:40:38, 7.06s/it] 55%|█████▌ | 6746/12188 [14:35:27<10:42:23, 7.08s/it] {'loss': 0.3784, 'grad_norm': 0.7150881547476697, 'learning_rate': 4.3797673140325995e-06, 'epoch': 0.55} + 55%|█████▌ | 6746/12188 [14:35:27<10:42:23, 7.08s/it] 55%|█████▌ | 6747/12188 [14:35:34<10:47:31, 7.14s/it] {'loss': 0.3179, 'grad_norm': 0.746344164664304, 'learning_rate': 4.378448892261388e-06, 'epoch': 0.55} + 55%|█████▌ | 6747/12188 [14:35:34<10:47:31, 7.14s/it] 55%|█████▌ | 6748/12188 [14:35:42<11:00:02, 7.28s/it] {'loss': 0.3047, 'grad_norm': 0.7474174221355758, 'learning_rate': 4.3771305143831184e-06, 'epoch': 0.55} + 55%|█████▌ | 6748/12188 [14:35:42<11:00:02, 7.28s/it] 55%|█████▌ | 6749/12188 [14:35:49<10:51:58, 7.19s/it] {'loss': 0.3299, 'grad_norm': 0.6753173043945567, 'learning_rate': 4.37581218049089e-06, 'epoch': 0.55} + 55%|█████▌ | 6749/12188 [14:35:49<10:51:58, 7.19s/it] 55%|█████▌ | 6750/12188 [14:35:59<12:10:13, 8.06s/it] {'loss': 0.316, 'grad_norm': 0.6552664402630239, 'learning_rate': 4.3744938906778016e-06, 'epoch': 0.55} + 55%|█████▌ | 6750/12188 [14:35:59<12:10:13, 8.06s/it] 55%|█████▌ | 6751/12188 [14:36:06<11:40:32, 7.73s/it] {'loss': 0.3181, 'grad_norm': 0.7701367013280838, 'learning_rate': 4.373175645036951e-06, 'epoch': 0.55} + 55%|█████▌ | 6751/12188 [14:36:06<11:40:32, 7.73s/it] 55%|█████▌ | 6752/12188 [14:36:15<12:02:59, 7.98s/it] {'loss': 0.315, 'grad_norm': 0.767636563079807, 'learning_rate': 4.3718574436614306e-06, 'epoch': 0.55} + 55%|█████▌ | 6752/12188 [14:36:15<12:02:59, 7.98s/it] 55%|█████▌ | 6753/12188 [14:36:21<11:28:31, 7.60s/it] {'loss': 0.2963, 'grad_norm': 0.6390116525099653, 'learning_rate': 4.3705392866443264e-06, 'epoch': 0.55} + 55%|█████▌ | 6753/12188 [14:36:21<11:28:31, 7.60s/it] 55%|█████▌ | 6754/12188 [14:36:29<11:24:47, 7.56s/it] {'loss': 0.3182, 'grad_norm': 0.783074806003217, 'learning_rate': 4.369221174078727e-06, 'epoch': 0.55} + 55%|█████▌ | 6754/12188 [14:36:29<11:24:47, 7.56s/it] 55%|█████▌ | 6755/12188 [14:36:35<11:01:48, 7.31s/it] {'loss': 0.3236, 'grad_norm': 0.7439565534637474, 'learning_rate': 4.3679031060577145e-06, 'epoch': 0.55} + 55%|█████▌ | 6755/12188 [14:36:35<11:01:48, 7.31s/it] 55%|█████▌ | 6756/12188 [14:36:42<10:48:05, 7.16s/it] {'loss': 0.3414, 'grad_norm': 4.544919592270904, 'learning_rate': 4.366585082674371e-06, 'epoch': 0.55} + 55%|█████▌ | 6756/12188 [14:36:42<10:48:05, 7.16s/it] 55%|█████▌ | 6757/12188 [14:36:49<10:44:33, 7.12s/it] {'loss': 0.296, 'grad_norm': 0.6669524268176537, 'learning_rate': 4.365267104021768e-06, 'epoch': 0.55} + 55%|█████▌ | 6757/12188 [14:36:49<10:44:33, 7.12s/it] 55%|█████▌ | 6758/12188 [14:36:56<10:35:39, 7.02s/it] {'loss': 0.3042, 'grad_norm': 0.6783630995061904, 'learning_rate': 4.3639491701929856e-06, 'epoch': 0.55} + 55%|█████▌ | 6758/12188 [14:36:56<10:35:39, 7.02s/it] 55%|█████▌ | 6759/12188 [14:37:04<10:48:58, 7.17s/it] {'loss': 0.3391, 'grad_norm': 0.7120198766094002, 'learning_rate': 4.3626312812810885e-06, 'epoch': 0.55} + 55%|█████▌ | 6759/12188 [14:37:04<10:48:58, 7.17s/it] 55%|█████▌ | 6760/12188 [14:37:13<11:40:02, 7.74s/it] {'loss': 0.3553, 'grad_norm': 0.6151703650681069, 'learning_rate': 4.361313437379147e-06, 'epoch': 0.55} + 55%|█████▌ | 6760/12188 [14:37:13<11:40:02, 7.74s/it] 55%|█████▌ | 6761/12188 [14:37:19<11:08:12, 7.39s/it] {'loss': 0.2872, 'grad_norm': 0.6554463651048029, 'learning_rate': 4.359995638580226e-06, 'epoch': 0.55} + 55%|█████▌ | 6761/12188 [14:37:19<11:08:12, 7.39s/it] 55%|█████▌ | 6762/12188 [14:37:26<10:55:49, 7.25s/it] {'loss': 0.3308, 'grad_norm': 0.6758930887896354, 'learning_rate': 4.358677884977382e-06, 'epoch': 0.55} + 55%|█████▌ | 6762/12188 [14:37:26<10:55:49, 7.25s/it] 55%|█████▌ | 6763/12188 [14:37:35<11:33:05, 7.67s/it] {'loss': 0.3286, 'grad_norm': 0.7306655508260328, 'learning_rate': 4.357360176663676e-06, 'epoch': 0.55} + 55%|█████▌ | 6763/12188 [14:37:35<11:33:05, 7.67s/it] 55%|█████▌ | 6764/12188 [14:37:42<11:16:14, 7.48s/it] {'loss': 0.3444, 'grad_norm': 0.7184733613961963, 'learning_rate': 4.356042513732164e-06, 'epoch': 0.55} + 55%|█████▌ | 6764/12188 [14:37:42<11:16:14, 7.48s/it] 56%|█████▌ | 6765/12188 [14:37:49<11:15:33, 7.47s/it] {'loss': 0.3307, 'grad_norm': 0.7504463210300375, 'learning_rate': 4.354724896275893e-06, 'epoch': 0.56} + 56%|█████▌ | 6765/12188 [14:37:49<11:15:33, 7.47s/it] 56%|█████▌ | 6766/12188 [14:37:56<10:57:20, 7.27s/it] {'loss': 0.3178, 'grad_norm': 0.6768283398430935, 'learning_rate': 4.353407324387915e-06, 'epoch': 0.56} + 56%|█████▌ | 6766/12188 [14:37:56<10:57:20, 7.27s/it] 56%|█████▌ | 6767/12188 [14:38:03<10:52:11, 7.22s/it] {'loss': 0.3232, 'grad_norm': 0.9057608164981166, 'learning_rate': 4.352089798161272e-06, 'epoch': 0.56} + 56%|█████▌ | 6767/12188 [14:38:03<10:52:11, 7.22s/it] 56%|█████▌ | 6768/12188 [14:38:10<10:40:18, 7.09s/it] {'loss': 0.3121, 'grad_norm': 0.6368800374256393, 'learning_rate': 4.350772317689006e-06, 'epoch': 0.56} + 56%|█████▌ | 6768/12188 [14:38:10<10:40:18, 7.09s/it] 56%|█████▌ | 6769/12188 [14:38:17<10:45:46, 7.15s/it] {'loss': 0.2913, 'grad_norm': 0.8171091814062101, 'learning_rate': 4.349454883064157e-06, 'epoch': 0.56} + 56%|█████▌ | 6769/12188 [14:38:17<10:45:46, 7.15s/it] 56%|█████▌ | 6770/12188 [14:38:24<10:33:11, 7.01s/it] {'loss': 0.358, 'grad_norm': 0.7450638831837604, 'learning_rate': 4.348137494379757e-06, 'epoch': 0.56} + 56%|█████▌ | 6770/12188 [14:38:24<10:33:11, 7.01s/it] 56%|█████▌ | 6771/12188 [14:38:31<10:24:03, 6.91s/it] {'loss': 0.3299, 'grad_norm': 1.012944999711811, 'learning_rate': 4.346820151728843e-06, 'epoch': 0.56} + 56%|█████▌ | 6771/12188 [14:38:31<10:24:03, 6.91s/it] 56%|█████▌ | 6772/12188 [14:38:37<10:14:07, 6.80s/it] {'loss': 0.341, 'grad_norm': 0.6605896848650747, 'learning_rate': 4.3455028552044365e-06, 'epoch': 0.56} + 56%|█████▌ | 6772/12188 [14:38:37<10:14:07, 6.80s/it] 56%|█████▌ | 6773/12188 [14:38:44<10:02:24, 6.67s/it] {'loss': 0.384, 'grad_norm': 0.8711210511661212, 'learning_rate': 4.344185604899569e-06, 'epoch': 0.56} + 56%|█████▌ | 6773/12188 [14:38:44<10:02:24, 6.67s/it] 56%|█████▌ | 6774/12188 [14:38:51<10:28:22, 6.96s/it] {'loss': 0.3487, 'grad_norm': 0.8862944423937736, 'learning_rate': 4.342868400907261e-06, 'epoch': 0.56} + 56%|█████▌ | 6774/12188 [14:38:51<10:28:22, 6.96s/it] 56%|█████▌ | 6775/12188 [14:38:59<11:00:13, 7.32s/it] {'loss': 0.3279, 'grad_norm': 0.7183026345314314, 'learning_rate': 4.3415512433205295e-06, 'epoch': 0.56} + 56%|█████▌ | 6775/12188 [14:38:59<11:00:13, 7.32s/it] 56%|█████▌ | 6776/12188 [14:39:07<11:05:11, 7.37s/it] {'loss': 0.3365, 'grad_norm': 0.663103725787822, 'learning_rate': 4.340234132232393e-06, 'epoch': 0.56} + 56%|█████▌ | 6776/12188 [14:39:07<11:05:11, 7.37s/it] 56%|█████▌ | 6777/12188 [14:39:13<10:42:41, 7.13s/it] {'loss': 0.3376, 'grad_norm': 0.7660237666187014, 'learning_rate': 4.33891706773586e-06, 'epoch': 0.56} + 56%|█████▌ | 6777/12188 [14:39:13<10:42:41, 7.13s/it] 56%|█████▌ | 6778/12188 [14:39:21<10:51:25, 7.22s/it] {'loss': 0.319, 'grad_norm': 0.7957621717766968, 'learning_rate': 4.337600049923941e-06, 'epoch': 0.56} + 56%|█████▌ | 6778/12188 [14:39:21<10:51:25, 7.22s/it] 56%|█████▌ | 6779/12188 [14:39:28<10:48:58, 7.20s/it] {'loss': 0.341, 'grad_norm': 0.7014654055321247, 'learning_rate': 4.336283078889646e-06, 'epoch': 0.56} + 56%|█████▌ | 6779/12188 [14:39:28<10:48:58, 7.20s/it] 56%|█████▌ | 6780/12188 [14:39:35<10:37:51, 7.08s/it] {'loss': 0.3983, 'grad_norm': 0.796448260916141, 'learning_rate': 4.334966154725971e-06, 'epoch': 0.56} + 56%|█████▌ | 6780/12188 [14:39:35<10:37:51, 7.08s/it] 56%|█████▌ | 6781/12188 [14:39:42<10:40:27, 7.11s/it] {'loss': 0.332, 'grad_norm': 1.6259367917739354, 'learning_rate': 4.333649277525917e-06, 'epoch': 0.56} + 56%|█████▌ | 6781/12188 [14:39:42<10:40:27, 7.11s/it] 56%|█████▌ | 6782/12188 [14:39:49<10:42:59, 7.14s/it] {'loss': 0.3212, 'grad_norm': 0.7014969440594874, 'learning_rate': 4.3323324473824825e-06, 'epoch': 0.56} + 56%|█████▌ | 6782/12188 [14:39:49<10:42:59, 7.14s/it] 56%|█████▌ | 6783/12188 [14:39:56<10:42:45, 7.14s/it] {'loss': 0.336, 'grad_norm': 0.8028131508440614, 'learning_rate': 4.331015664388656e-06, 'epoch': 0.56} + 56%|█████▌ | 6783/12188 [14:39:56<10:42:45, 7.14s/it] 56%|█████▌ | 6784/12188 [14:40:04<11:00:11, 7.33s/it] {'loss': 0.3464, 'grad_norm': 0.8017512184672216, 'learning_rate': 4.32969892863743e-06, 'epoch': 0.56} + 56%|█████▌ | 6784/12188 [14:40:04<11:00:11, 7.33s/it] 56%|█████▌ | 6785/12188 [14:40:11<10:39:42, 7.10s/it] {'loss': 0.3036, 'grad_norm': 0.6308143361162645, 'learning_rate': 4.328382240221788e-06, 'epoch': 0.56} + 56%|█████▌ | 6785/12188 [14:40:11<10:39:42, 7.10s/it] 56%|█████▌ | 6786/12188 [14:40:18<10:39:14, 7.10s/it] {'loss': 0.3725, 'grad_norm': 0.6973326866573275, 'learning_rate': 4.327065599234713e-06, 'epoch': 0.56} + 56%|█████▌ | 6786/12188 [14:40:18<10:39:14, 7.10s/it] 56%|█████▌ | 6787/12188 [14:40:25<10:47:39, 7.19s/it] {'loss': 0.3482, 'grad_norm': 6.689959803520651, 'learning_rate': 4.325749005769184e-06, 'epoch': 0.56} + 56%|█████▌ | 6787/12188 [14:40:25<10:47:39, 7.19s/it] 56%|█████▌ | 6788/12188 [14:40:32<10:30:26, 7.00s/it] {'loss': 0.3129, 'grad_norm': 0.7276283989345558, 'learning_rate': 4.324432459918178e-06, 'epoch': 0.56} + 56%|█████▌ | 6788/12188 [14:40:32<10:30:26, 7.00s/it] 56%|█████▌ | 6789/12188 [14:40:40<10:54:52, 7.28s/it] {'loss': 0.3262, 'grad_norm': 0.6653614519250868, 'learning_rate': 4.3231159617746675e-06, 'epoch': 0.56} + 56%|█████▌ | 6789/12188 [14:40:40<10:54:52, 7.28s/it] 56%|█████▌ | 6790/12188 [14:40:47<10:57:24, 7.31s/it] {'loss': 0.2929, 'grad_norm': 0.7263986862842099, 'learning_rate': 4.321799511431617e-06, 'epoch': 0.56} + 56%|█████▌ | 6790/12188 [14:40:47<10:57:24, 7.31s/it] 56%|█████▌ | 6791/12188 [14:40:54<10:51:54, 7.25s/it] {'loss': 0.279, 'grad_norm': 0.6412680460596641, 'learning_rate': 4.320483108981996e-06, 'epoch': 0.56} + 56%|█████▌ | 6791/12188 [14:40:54<10:51:54, 7.25s/it] 56%|█████▌ | 6792/12188 [14:41:01<10:41:14, 7.13s/it] {'loss': 0.3388, 'grad_norm': 0.8053925148021125, 'learning_rate': 4.319166754518768e-06, 'epoch': 0.56} + 56%|█████▌ | 6792/12188 [14:41:01<10:41:14, 7.13s/it] 56%|█████▌ | 6793/12188 [14:41:08<10:32:32, 7.03s/it] {'loss': 0.3327, 'grad_norm': 0.7116719583350045, 'learning_rate': 4.317850448134888e-06, 'epoch': 0.56} + 56%|█████▌ | 6793/12188 [14:41:08<10:32:32, 7.03s/it] 56%|█████▌ | 6794/12188 [14:41:15<10:32:40, 7.04s/it] {'loss': 0.3127, 'grad_norm': 0.6647307520934768, 'learning_rate': 4.316534189923315e-06, 'epoch': 0.56} + 56%|█████▌ | 6794/12188 [14:41:15<10:32:40, 7.04s/it] 56%|█████▌ | 6795/12188 [14:41:23<10:51:02, 7.24s/it] {'loss': 0.3092, 'grad_norm': 0.746945178475027, 'learning_rate': 4.315217979976999e-06, 'epoch': 0.56} + 56%|█████▌ | 6795/12188 [14:41:23<10:51:02, 7.24s/it] 56%|█████▌ | 6796/12188 [14:41:30<10:44:23, 7.17s/it] {'loss': 0.312, 'grad_norm': 0.661592463072802, 'learning_rate': 4.313901818388888e-06, 'epoch': 0.56} + 56%|█████▌ | 6796/12188 [14:41:30<10:44:23, 7.17s/it] 56%|█████▌ | 6797/12188 [14:41:38<11:20:27, 7.57s/it] {'loss': 0.2852, 'grad_norm': 0.7398650548993734, 'learning_rate': 4.312585705251931e-06, 'epoch': 0.56} + 56%|█████▌ | 6797/12188 [14:41:38<11:20:27, 7.57s/it] 56%|█████▌ | 6798/12188 [14:41:46<11:29:31, 7.68s/it] {'loss': 0.3138, 'grad_norm': 0.6654821850530835, 'learning_rate': 4.311269640659063e-06, 'epoch': 0.56} + 56%|█████▌ | 6798/12188 [14:41:46<11:29:31, 7.68s/it] 56%|█████▌ | 6799/12188 [14:41:53<11:04:23, 7.40s/it] {'loss': 0.3464, 'grad_norm': 0.6355507872322262, 'learning_rate': 4.3099536247032285e-06, 'epoch': 0.56} + 56%|█████▌ | 6799/12188 [14:41:53<11:04:23, 7.40s/it] 56%|█████▌ | 6800/12188 [14:42:00<10:52:31, 7.27s/it] {'loss': 0.3324, 'grad_norm': 0.6732619581316067, 'learning_rate': 4.308637657477362e-06, 'epoch': 0.56} + 56%|█████▌ | 6800/12188 [14:42:00<10:52:31, 7.27s/it] 56%|█████▌ | 6801/12188 [14:42:06<10:34:34, 7.07s/it] {'loss': 0.2885, 'grad_norm': 0.907297030361115, 'learning_rate': 4.307321739074392e-06, 'epoch': 0.56} + 56%|█████▌ | 6801/12188 [14:42:06<10:34:34, 7.07s/it] 56%|█████▌ | 6802/12188 [14:42:13<10:23:11, 6.94s/it] {'loss': 0.3095, 'grad_norm': 0.7759586826718257, 'learning_rate': 4.306005869587249e-06, 'epoch': 0.56} + 56%|█████▌ | 6802/12188 [14:42:13<10:23:11, 6.94s/it] 56%|█████▌ | 6803/12188 [14:42:21<10:46:13, 7.20s/it] {'loss': 0.306, 'grad_norm': 0.7250309118361513, 'learning_rate': 4.304690049108854e-06, 'epoch': 0.56} + 56%|█████▌ | 6803/12188 [14:42:21<10:46:13, 7.20s/it] 56%|█████▌ | 6804/12188 [14:42:28<10:41:35, 7.15s/it] {'loss': 0.3857, 'grad_norm': 0.7047117870624263, 'learning_rate': 4.303374277732131e-06, 'epoch': 0.56} + 56%|█████▌ | 6804/12188 [14:42:28<10:41:35, 7.15s/it] 56%|█████▌ | 6805/12188 [14:42:34<10:26:59, 6.99s/it] {'loss': 0.3229, 'grad_norm': 0.6799472203320427, 'learning_rate': 4.302058555549999e-06, 'epoch': 0.56} + 56%|█████▌ | 6805/12188 [14:42:34<10:26:59, 6.99s/it] 56%|█████▌ | 6806/12188 [14:42:41<10:25:37, 6.97s/it] {'loss': 0.2889, 'grad_norm': 0.6501043474394151, 'learning_rate': 4.300742882655369e-06, 'epoch': 0.56} + 56%|█████▌ | 6806/12188 [14:42:41<10:25:37, 6.97s/it] 56%|█████▌ | 6807/12188 [14:42:48<10:22:46, 6.94s/it] {'loss': 0.3136, 'grad_norm': 0.7627032870964416, 'learning_rate': 4.299427259141155e-06, 'epoch': 0.56} + 56%|█████▌ | 6807/12188 [14:42:48<10:22:46, 6.94s/it] 56%|█████▌ | 6808/12188 [14:42:55<10:14:02, 6.85s/it] {'loss': 0.3396, 'grad_norm': 0.7238211399602147, 'learning_rate': 4.298111685100262e-06, 'epoch': 0.56} + 56%|█████▌ | 6808/12188 [14:42:55<10:14:02, 6.85s/it] 56%|█████▌ | 6809/12188 [14:43:03<10:56:49, 7.33s/it] {'loss': 0.2945, 'grad_norm': 0.6236377983464992, 'learning_rate': 4.296796160625593e-06, 'epoch': 0.56} + 56%|█████▌ | 6809/12188 [14:43:03<10:56:49, 7.33s/it] 56%|█████▌ | 6810/12188 [14:43:12<11:23:48, 7.63s/it] {'loss': 0.3386, 'grad_norm': 0.6442571263173138, 'learning_rate': 4.2954806858100506e-06, 'epoch': 0.56} + 56%|█████▌ | 6810/12188 [14:43:12<11:23:48, 7.63s/it] 56%|█████▌ | 6811/12188 [14:43:18<11:01:06, 7.38s/it] {'loss': 0.3083, 'grad_norm': 0.6680561874282331, 'learning_rate': 4.294165260746528e-06, 'epoch': 0.56} + 56%|█████▌ | 6811/12188 [14:43:18<11:01:06, 7.38s/it] 56%|█████▌ | 6812/12188 [14:43:26<11:01:34, 7.38s/it] {'loss': 0.3257, 'grad_norm': 0.6603990690462882, 'learning_rate': 4.292849885527924e-06, 'epoch': 0.56} + 56%|█████▌ | 6812/12188 [14:43:26<11:01:34, 7.38s/it] 56%|█████▌ | 6813/12188 [14:43:33<10:50:25, 7.26s/it] {'loss': 0.3294, 'grad_norm': 0.7796874091313046, 'learning_rate': 4.291534560247121e-06, 'epoch': 0.56} + 56%|█████▌ | 6813/12188 [14:43:33<10:50:25, 7.26s/it] 56%|█████▌ | 6814/12188 [14:43:40<10:47:05, 7.22s/it] {'loss': 0.3124, 'grad_norm': 0.7084962300668426, 'learning_rate': 4.290219284997012e-06, 'epoch': 0.56} + 56%|█████▌ | 6814/12188 [14:43:40<10:47:05, 7.22s/it] 56%|█████▌ | 6815/12188 [14:43:48<11:20:56, 7.60s/it] {'loss': 0.355, 'grad_norm': 0.6404764298377381, 'learning_rate': 4.288904059870476e-06, 'epoch': 0.56} + 56%|█████▌ | 6815/12188 [14:43:48<11:20:56, 7.60s/it] 56%|█████▌ | 6816/12188 [14:43:55<11:06:28, 7.44s/it] {'loss': 0.3508, 'grad_norm': 0.7699553145228769, 'learning_rate': 4.287588884960393e-06, 'epoch': 0.56} + 56%|█████▌ | 6816/12188 [14:43:55<11:06:28, 7.44s/it] 56%|█████▌ | 6817/12188 [14:44:02<10:48:37, 7.25s/it] {'loss': 0.3005, 'grad_norm': 0.6833341987595396, 'learning_rate': 4.286273760359637e-06, 'epoch': 0.56} + 56%|█████▌ | 6817/12188 [14:44:02<10:48:37, 7.25s/it] 56%|█████▌ | 6818/12188 [14:44:11<11:22:17, 7.62s/it] {'loss': 0.2987, 'grad_norm': 0.7518361291469017, 'learning_rate': 4.2849586861610835e-06, 'epoch': 0.56} + 56%|█████▌ | 6818/12188 [14:44:11<11:22:17, 7.62s/it] 56%|█████▌ | 6819/12188 [14:44:18<11:09:25, 7.48s/it] {'loss': 0.3073, 'grad_norm': 0.8195673866346911, 'learning_rate': 4.283643662457597e-06, 'epoch': 0.56} + 56%|█████▌ | 6819/12188 [14:44:18<11:09:25, 7.48s/it] 56%|█████▌ | 6820/12188 [14:44:25<11:04:32, 7.43s/it] {'loss': 0.3117, 'grad_norm': 0.6874986056079481, 'learning_rate': 4.282328689342046e-06, 'epoch': 0.56} + 56%|█████▌ | 6820/12188 [14:44:25<11:04:32, 7.43s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 56%|█████▌ | 6821/12188 [14:44:31<10:32:54, 7.08s/it] {'loss': 0.6142, 'grad_norm': 0.5572900015973375, 'learning_rate': 4.281013766907288e-06, 'epoch': 0.56} + 56%|█████▌ | 6821/12188 [14:44:31<10:32:54, 7.08s/it] 56%|█████▌ | 6822/12188 [14:44:39<10:49:56, 7.27s/it] {'loss': 0.3297, 'grad_norm': 0.6699929839338844, 'learning_rate': 4.279698895246184e-06, 'epoch': 0.56} + 56%|█████▌ | 6822/12188 [14:44:39<10:49:56, 7.27s/it] 56%|█████▌ | 6823/12188 [14:44:46<10:43:56, 7.20s/it] {'loss': 0.297, 'grad_norm': 0.6959382836610806, 'learning_rate': 4.278384074451587e-06, 'epoch': 0.56} + 56%|█████▌ | 6823/12188 [14:44:46<10:43:56, 7.20s/it] 56%|█████▌ | 6824/12188 [14:44:54<10:47:33, 7.24s/it] {'loss': 0.3282, 'grad_norm': 0.796813582460204, 'learning_rate': 4.277069304616346e-06, 'epoch': 0.56} + 56%|█████▌ | 6824/12188 [14:44:54<10:47:33, 7.24s/it] 56%|█████▌ | 6825/12188 [14:45:00<10:37:39, 7.13s/it] {'loss': 0.3316, 'grad_norm': 1.145189394838764, 'learning_rate': 4.275754585833311e-06, 'epoch': 0.56} + 56%|█████▌ | 6825/12188 [14:45:00<10:37:39, 7.13s/it] 56%|█████▌ | 6826/12188 [14:45:08<10:36:59, 7.13s/it] {'loss': 0.314, 'grad_norm': 0.6550184053677593, 'learning_rate': 4.274439918195322e-06, 'epoch': 0.56} + 56%|█████▌ | 6826/12188 [14:45:08<10:36:59, 7.13s/it] 56%|█████▌ | 6827/12188 [14:45:17<11:26:03, 7.68s/it] {'loss': 0.3135, 'grad_norm': 0.6903948870748502, 'learning_rate': 4.273125301795219e-06, 'epoch': 0.56} + 56%|█████▌ | 6827/12188 [14:45:17<11:26:03, 7.68s/it] 56%|█████▌ | 6828/12188 [14:45:23<11:02:11, 7.41s/it] {'loss': 0.3444, 'grad_norm': 0.6704743738984165, 'learning_rate': 4.271810736725842e-06, 'epoch': 0.56} + 56%|█████▌ | 6828/12188 [14:45:23<11:02:11, 7.41s/it] 56%|█████▌ | 6829/12188 [14:45:31<11:08:50, 7.49s/it] {'loss': 0.3236, 'grad_norm': 0.6603022642647622, 'learning_rate': 4.2704962230800205e-06, 'epoch': 0.56} + 56%|█████▌ | 6829/12188 [14:45:31<11:08:50, 7.49s/it] 56%|█████▌ | 6830/12188 [14:45:38<10:48:11, 7.26s/it] {'loss': 0.3255, 'grad_norm': 0.8605633271129273, 'learning_rate': 4.269181760950584e-06, 'epoch': 0.56} + 56%|█████▌ | 6830/12188 [14:45:38<10:48:11, 7.26s/it] 56%|█████▌ | 6831/12188 [14:45:47<11:40:28, 7.85s/it] {'loss': 0.3363, 'grad_norm': 0.8923613014761076, 'learning_rate': 4.267867350430356e-06, 'epoch': 0.56} + 56%|█████▌ | 6831/12188 [14:45:47<11:40:28, 7.85s/it] 56%|█████▌ | 6832/12188 [14:45:54<11:14:09, 7.55s/it] {'loss': 0.2877, 'grad_norm': 0.775499787407987, 'learning_rate': 4.2665529916121585e-06, 'epoch': 0.56} + 56%|█████▌ | 6832/12188 [14:45:54<11:14:09, 7.55s/it] 56%|█████▌ | 6833/12188 [14:46:02<11:27:44, 7.71s/it] {'loss': 0.3081, 'grad_norm': 0.6656153135256929, 'learning_rate': 4.265238684588813e-06, 'epoch': 0.56} + 56%|█████▌ | 6833/12188 [14:46:02<11:27:44, 7.71s/it] 56%|█████▌ | 6834/12188 [14:46:09<11:12:51, 7.54s/it] {'loss': 0.3363, 'grad_norm': 0.6543311238046932, 'learning_rate': 4.263924429453129e-06, 'epoch': 0.56} + 56%|█████▌ | 6834/12188 [14:46:09<11:12:51, 7.54s/it] 56%|█████▌ | 6835/12188 [14:46:16<10:48:01, 7.26s/it] {'loss': 0.3476, 'grad_norm': 0.800162234181209, 'learning_rate': 4.26261022629792e-06, 'epoch': 0.56} + 56%|█████▌ | 6835/12188 [14:46:16<10:48:01, 7.26s/it] 56%|█████▌ | 6836/12188 [14:46:23<10:49:09, 7.28s/it] {'loss': 0.3768, 'grad_norm': 0.8446547909133654, 'learning_rate': 4.261296075215994e-06, 'epoch': 0.56} + 56%|█████▌ | 6836/12188 [14:46:23<10:49:09, 7.28s/it] 56%|█████▌ | 6837/12188 [14:46:31<11:05:18, 7.46s/it] {'loss': 0.3521, 'grad_norm': 0.7075802852997959, 'learning_rate': 4.259981976300149e-06, 'epoch': 0.56} + 56%|█████▌ | 6837/12188 [14:46:31<11:05:18, 7.46s/it] 56%|█████▌ | 6838/12188 [14:46:40<11:53:57, 8.01s/it] {'loss': 0.2995, 'grad_norm': 0.6712423595523062, 'learning_rate': 4.258667929643191e-06, 'epoch': 0.56} + 56%|█████▌ | 6838/12188 [14:46:40<11:53:57, 8.01s/it] 56%|█████▌ | 6839/12188 [14:46:48<11:43:23, 7.89s/it] {'loss': 0.3248, 'grad_norm': 0.755454797816162, 'learning_rate': 4.25735393533791e-06, 'epoch': 0.56} + 56%|█████▌ | 6839/12188 [14:46:48<11:43:23, 7.89s/it] 56%|█████▌ | 6840/12188 [14:46:55<11:32:40, 7.77s/it] {'loss': 0.3102, 'grad_norm': 0.8129621324314413, 'learning_rate': 4.256039993477102e-06, 'epoch': 0.56} + 56%|█████▌ | 6840/12188 [14:46:55<11:32:40, 7.77s/it] 56%|█████▌ | 6841/12188 [14:47:02<10:55:23, 7.35s/it] {'loss': 0.372, 'grad_norm': 0.6959783034399186, 'learning_rate': 4.254726104153555e-06, 'epoch': 0.56} + 56%|█████▌ | 6841/12188 [14:47:02<10:55:23, 7.35s/it] 56%|█████▌ | 6842/12188 [14:47:09<10:52:40, 7.33s/it] {'loss': 0.3085, 'grad_norm': 0.7261323471314025, 'learning_rate': 4.253412267460053e-06, 'epoch': 0.56} + 56%|█████▌ | 6842/12188 [14:47:09<10:52:40, 7.33s/it] 56%|█████▌ | 6843/12188 [14:47:16<10:41:03, 7.20s/it] {'loss': 0.3174, 'grad_norm': 0.7336270795569216, 'learning_rate': 4.252098483489377e-06, 'epoch': 0.56} + 56%|█████▌ | 6843/12188 [14:47:16<10:41:03, 7.20s/it] 56%|█████▌ | 6844/12188 [14:47:23<10:49:44, 7.30s/it] {'loss': 0.2941, 'grad_norm': 0.6248281053073925, 'learning_rate': 4.250784752334303e-06, 'epoch': 0.56} + 56%|█████▌ | 6844/12188 [14:47:23<10:49:44, 7.30s/it] 56%|█████▌ | 6845/12188 [14:47:31<10:58:20, 7.39s/it] {'loss': 0.3168, 'grad_norm': 0.6276555505295975, 'learning_rate': 4.249471074087607e-06, 'epoch': 0.56} + 56%|█████▌ | 6845/12188 [14:47:31<10:58:20, 7.39s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f71a4e61d00> +[Try #0] Failed to fetch sample 4401396 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f71a4e61d00> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Expert Portfolios - Sleeping'"}, {'from': 'gpt', 'value': '\nclick(x=0.4395, y=0.012)\n'}]} + 56%|█████▌ | 6846/12188 [14:47:38<10:38:00, 7.17s/it] {'loss': 0.3379, 'grad_norm': 0.7456016462780767, 'learning_rate': 4.24815744884206e-06, 'epoch': 0.56} + 56%|█████▌ | 6846/12188 [14:47:38<10:38:00, 7.17s/it] 56%|█████▌ | 6847/12188 [14:47:45<10:58:07, 7.39s/it] {'loss': 0.3355, 'grad_norm': 0.8509309905542873, 'learning_rate': 4.246843876690423e-06, 'epoch': 0.56} + 56%|█████▌ | 6847/12188 [14:47:45<10:58:07, 7.39s/it] 56%|█████▌ | 6848/12188 [14:47:54<11:18:28, 7.62s/it] {'loss': 0.3186, 'grad_norm': 0.6835580400363951, 'learning_rate': 4.245530357725464e-06, 'epoch': 0.56} + 56%|█████▌ | 6848/12188 [14:47:54<11:18:28, 7.62s/it] 56%|█████▌ | 6849/12188 [14:48:00<10:54:43, 7.36s/it] {'loss': 0.2995, 'grad_norm': 0.6613420701434356, 'learning_rate': 4.244216892039937e-06, 'epoch': 0.56} + 56%|█████▌ | 6849/12188 [14:48:00<10:54:43, 7.36s/it] 56%|█████▌ | 6850/12188 [14:48:07<10:38:48, 7.18s/it] {'loss': 0.3255, 'grad_norm': 0.723793586389631, 'learning_rate': 4.2429034797266e-06, 'epoch': 0.56} + 56%|█████▌ | 6850/12188 [14:48:07<10:38:48, 7.18s/it] 56%|█████▌ | 6851/12188 [14:48:15<10:46:17, 7.27s/it] {'loss': 0.3016, 'grad_norm': 0.7019117594071697, 'learning_rate': 4.241590120878204e-06, 'epoch': 0.56} + 56%|█████▌ | 6851/12188 [14:48:15<10:46:17, 7.27s/it] 56%|█████▌ | 6852/12188 [14:48:23<11:05:14, 7.48s/it] {'loss': 0.363, 'grad_norm': 0.6741312451406342, 'learning_rate': 4.240276815587492e-06, 'epoch': 0.56} + 56%|█████▌ | 6852/12188 [14:48:23<11:05:14, 7.48s/it] 56%|█████▌ | 6853/12188 [14:48:30<10:53:39, 7.35s/it] {'loss': 0.3056, 'grad_norm': 0.646643911130416, 'learning_rate': 4.238963563947212e-06, 'epoch': 0.56} + 56%|█████▌ | 6853/12188 [14:48:30<10:53:39, 7.35s/it] 56%|█████▌ | 6854/12188 [14:48:37<10:47:02, 7.28s/it] {'loss': 0.3576, 'grad_norm': 0.6816958224016055, 'learning_rate': 4.2376503660501035e-06, 'epoch': 0.56} + 56%|█████▌ | 6854/12188 [14:48:37<10:47:02, 7.28s/it] 56%|█████▌ | 6855/12188 [14:48:44<10:41:33, 7.22s/it] {'loss': 0.2989, 'grad_norm': 0.7768009292638507, 'learning_rate': 4.2363372219889e-06, 'epoch': 0.56} + 56%|█████▌ | 6855/12188 [14:48:44<10:41:33, 7.22s/it] 56%|█████▋ | 6856/12188 [14:48:50<10:27:41, 7.06s/it] {'loss': 0.3517, 'grad_norm': 0.6870493202698912, 'learning_rate': 4.235024131856336e-06, 'epoch': 0.56} + 56%|█████▋ | 6856/12188 [14:48:50<10:27:41, 7.06s/it] 56%|█████▋ | 6857/12188 [14:48:57<10:19:30, 6.97s/it] {'loss': 0.3102, 'grad_norm': 0.6580774915842689, 'learning_rate': 4.233711095745139e-06, 'epoch': 0.56} + 56%|█████▋ | 6857/12188 [14:48:57<10:19:30, 6.97s/it] 56%|█████▋ | 6858/12188 [14:49:05<10:49:01, 7.31s/it] {'loss': 0.3206, 'grad_norm': 0.723627538302717, 'learning_rate': 4.2323981137480315e-06, 'epoch': 0.56} + 56%|█████▋ | 6858/12188 [14:49:05<10:49:01, 7.31s/it] 56%|█████▋ | 6859/12188 [14:49:12<10:43:42, 7.25s/it] {'loss': 0.3133, 'grad_norm': 0.7320721801722306, 'learning_rate': 4.231085185957739e-06, 'epoch': 0.56} + 56%|█████▋ | 6859/12188 [14:49:12<10:43:42, 7.25s/it] 56%|█████▋ | 6860/12188 [14:49:20<10:41:28, 7.22s/it] {'loss': 0.3313, 'grad_norm': 0.677016491155214, 'learning_rate': 4.229772312466973e-06, 'epoch': 0.56} + 56%|█████▋ | 6860/12188 [14:49:20<10:41:28, 7.22s/it] 56%|█████▋ | 6861/12188 [14:49:27<10:36:13, 7.17s/it] {'loss': 0.3027, 'grad_norm': 0.6540499773395221, 'learning_rate': 4.22845949336845e-06, 'epoch': 0.56} + 56%|█████▋ | 6861/12188 [14:49:27<10:36:13, 7.17s/it] 56%|█████▋ | 6862/12188 [14:49:34<10:47:12, 7.29s/it] {'loss': 0.3297, 'grad_norm': 0.9353417541471516, 'learning_rate': 4.2271467287548764e-06, 'epoch': 0.56} + 56%|█████▋ | 6862/12188 [14:49:34<10:47:12, 7.29s/it] 56%|█████▋ | 6863/12188 [14:49:42<10:49:57, 7.32s/it] {'loss': 0.3339, 'grad_norm': 0.6869921656538439, 'learning_rate': 4.22583401871896e-06, 'epoch': 0.56} + 56%|█████▋ | 6863/12188 [14:49:42<10:49:57, 7.32s/it] 56%|█████▋ | 6864/12188 [14:49:49<10:38:16, 7.19s/it] {'loss': 0.3293, 'grad_norm': 0.7050705132277877, 'learning_rate': 4.224521363353403e-06, 'epoch': 0.56} + 56%|█████▋ | 6864/12188 [14:49:49<10:38:16, 7.19s/it] 56%|█████▋ | 6865/12188 [14:49:56<10:44:36, 7.27s/it] {'loss': 0.3051, 'grad_norm': 0.6537196622100582, 'learning_rate': 4.223208762750899e-06, 'epoch': 0.56} + 56%|█████▋ | 6865/12188 [14:49:56<10:44:36, 7.27s/it] 56%|█████▋ | 6866/12188 [14:50:02<10:24:44, 7.04s/it] {'loss': 0.3001, 'grad_norm': 0.7178002306083943, 'learning_rate': 4.221896217004143e-06, 'epoch': 0.56} + 56%|█████▋ | 6866/12188 [14:50:02<10:24:44, 7.04s/it] 56%|█████▋ | 6867/12188 [14:50:09<10:22:03, 7.01s/it] {'loss': 0.3096, 'grad_norm': 0.6697008677137798, 'learning_rate': 4.220583726205829e-06, 'epoch': 0.56} + 56%|█████▋ | 6867/12188 [14:50:09<10:22:03, 7.01s/it] 56%|█████▋ | 6868/12188 [14:50:17<10:27:47, 7.08s/it] {'loss': 0.3101, 'grad_norm': 0.708251380170302, 'learning_rate': 4.219271290448639e-06, 'epoch': 0.56} + 56%|█████▋ | 6868/12188 [14:50:17<10:27:47, 7.08s/it] 56%|█████▋ | 6869/12188 [14:50:25<10:59:21, 7.44s/it] {'loss': 0.2861, 'grad_norm': 0.7153291831326698, 'learning_rate': 4.2179589098252574e-06, 'epoch': 0.56} + 56%|█████▋ | 6869/12188 [14:50:25<10:59:21, 7.44s/it] 56%|█████▋ | 6870/12188 [14:50:33<11:24:34, 7.72s/it] {'loss': 0.3323, 'grad_norm': 0.6764377801170567, 'learning_rate': 4.21664658442836e-06, 'epoch': 0.56} + 56%|█████▋ | 6870/12188 [14:50:33<11:24:34, 7.72s/it] 56%|█████▋ | 6871/12188 [14:50:40<11:08:59, 7.55s/it] {'loss': 0.3132, 'grad_norm': 0.6311258819375247, 'learning_rate': 4.2153343143506215e-06, 'epoch': 0.56} + 56%|█████▋ | 6871/12188 [14:50:40<11:08:59, 7.55s/it] 56%|█████▋ | 6872/12188 [14:50:47<10:53:52, 7.38s/it] {'loss': 0.298, 'grad_norm': 0.7133626537922685, 'learning_rate': 4.214022099684715e-06, 'epoch': 0.56} + 56%|█████▋ | 6872/12188 [14:50:47<10:53:52, 7.38s/it] 56%|█████▋ | 6873/12188 [14:50:54<10:44:49, 7.28s/it] {'loss': 0.3317, 'grad_norm': 0.7559332432795586, 'learning_rate': 4.2127099405233024e-06, 'epoch': 0.56} + 56%|█████▋ | 6873/12188 [14:50:54<10:44:49, 7.28s/it] 56%|█████▋ | 6874/12188 [14:51:01<10:36:04, 7.18s/it] {'loss': 0.3096, 'grad_norm': 0.7314242472005377, 'learning_rate': 4.211397836959052e-06, 'epoch': 0.56} + 56%|█████▋ | 6874/12188 [14:51:01<10:36:04, 7.18s/it] 56%|█████▋ | 6875/12188 [14:51:08<10:27:46, 7.09s/it] {'loss': 0.3152, 'grad_norm': 0.888065920744925, 'learning_rate': 4.210085789084617e-06, 'epoch': 0.56} + 56%|█████▋ | 6875/12188 [14:51:08<10:27:46, 7.09s/it] 56%|█████▋ | 6876/12188 [14:51:15<10:26:16, 7.07s/it] {'loss': 0.3327, 'grad_norm': 0.7019143323088833, 'learning_rate': 4.2087737969926545e-06, 'epoch': 0.56} + 56%|█████▋ | 6876/12188 [14:51:15<10:26:16, 7.07s/it] 56%|█████▋ | 6877/12188 [14:51:23<10:28:10, 7.10s/it] {'loss': 0.3145, 'grad_norm': 0.7038400384767666, 'learning_rate': 4.207461860775816e-06, 'epoch': 0.56} + 56%|█████▋ | 6877/12188 [14:51:23<10:28:10, 7.10s/it] 56%|█████▋ | 6878/12188 [14:51:30<10:32:20, 7.15s/it] {'loss': 0.3502, 'grad_norm': 0.6438697621708375, 'learning_rate': 4.206149980526748e-06, 'epoch': 0.56} + 56%|█████▋ | 6878/12188 [14:51:30<10:32:20, 7.15s/it] 56%|█████▋ | 6879/12188 [14:51:37<10:37:01, 7.20s/it] {'loss': 0.352, 'grad_norm': 0.8791847500223677, 'learning_rate': 4.204838156338094e-06, 'epoch': 0.56} + 56%|█████▋ | 6879/12188 [14:51:37<10:37:01, 7.20s/it] 56%|█████▋ | 6880/12188 [14:51:44<10:22:24, 7.04s/it] {'loss': 0.2976, 'grad_norm': 0.7755602228064423, 'learning_rate': 4.203526388302489e-06, 'epoch': 0.56} + 56%|█████▋ | 6880/12188 [14:51:44<10:22:24, 7.04s/it] 56%|█████▋ | 6881/12188 [14:51:51<10:26:24, 7.08s/it] {'loss': 0.3323, 'grad_norm': 0.662208172669629, 'learning_rate': 4.202214676512571e-06, 'epoch': 0.56} + 56%|█████▋ | 6881/12188 [14:51:51<10:26:24, 7.08s/it] 56%|█████▋ | 6882/12188 [14:51:58<10:24:20, 7.06s/it] {'loss': 0.2878, 'grad_norm': 0.8372389688983864, 'learning_rate': 4.200903021060973e-06, 'epoch': 0.56} + 56%|█████▋ | 6882/12188 [14:51:58<10:24:20, 7.06s/it] 56%|█████▋ | 6883/12188 [14:52:05<10:17:10, 6.98s/it] {'loss': 0.2904, 'grad_norm': 0.7091300357324504, 'learning_rate': 4.199591422040318e-06, 'epoch': 0.56} + 56%|█████▋ | 6883/12188 [14:52:05<10:17:10, 6.98s/it] 56%|█████▋ | 6884/12188 [14:52:12<10:33:43, 7.17s/it] {'loss': 0.3367, 'grad_norm': 0.7113278452611494, 'learning_rate': 4.198279879543232e-06, 'epoch': 0.56} + 56%|█████▋ | 6884/12188 [14:52:12<10:33:43, 7.17s/it] 56%|█████▋ | 6885/12188 [14:52:23<12:03:00, 8.18s/it] {'loss': 0.3408, 'grad_norm': 0.885381811246509, 'learning_rate': 4.196968393662334e-06, 'epoch': 0.56} + 56%|█████▋ | 6885/12188 [14:52:23<12:03:00, 8.18s/it] 56%|█████▋ | 6886/12188 [14:52:30<11:24:44, 7.75s/it] {'loss': 0.3268, 'grad_norm': 0.6701482847558223, 'learning_rate': 4.195656964490235e-06, 'epoch': 0.56} + 56%|█████▋ | 6886/12188 [14:52:30<11:24:44, 7.75s/it] 57%|█████▋ | 6887/12188 [14:52:37<11:09:36, 7.58s/it] {'loss': 0.3137, 'grad_norm': 0.6736994841787683, 'learning_rate': 4.194345592119551e-06, 'epoch': 0.57} + 57%|█████▋ | 6887/12188 [14:52:37<11:09:36, 7.58s/it] 57%|█████▋ | 6888/12188 [14:52:44<11:10:55, 7.60s/it] {'loss': 0.3034, 'grad_norm': 0.6911840987200915, 'learning_rate': 4.193034276642886e-06, 'epoch': 0.57} + 57%|█████▋ | 6888/12188 [14:52:44<11:10:55, 7.60s/it] 57%|█████▋ | 6889/12188 [14:52:53<11:27:16, 7.78s/it] {'loss': 0.3113, 'grad_norm': 0.7531252199774918, 'learning_rate': 4.191723018152842e-06, 'epoch': 0.57} + 57%|█████▋ | 6889/12188 [14:52:53<11:27:16, 7.78s/it] 57%|█████▋ | 6890/12188 [14:52:59<11:01:19, 7.49s/it] {'loss': 0.2737, 'grad_norm': 0.6781339419909869, 'learning_rate': 4.190411816742024e-06, 'epoch': 0.57} + 57%|█████▋ | 6890/12188 [14:52:59<11:01:19, 7.49s/it] 57%|█████▋ | 6891/12188 [14:53:07<10:56:40, 7.44s/it] {'loss': 0.3245, 'grad_norm': 0.6484822494139602, 'learning_rate': 4.18910067250302e-06, 'epoch': 0.57} + 57%|█████▋ | 6891/12188 [14:53:07<10:56:40, 7.44s/it] 57%|█████▋ | 6892/12188 [14:53:15<11:05:21, 7.54s/it] {'loss': 0.322, 'grad_norm': 0.7038148379510518, 'learning_rate': 4.1877895855284255e-06, 'epoch': 0.57} + 57%|█████▋ | 6892/12188 [14:53:15<11:05:21, 7.54s/it] 57%|█████▋ | 6893/12188 [14:53:21<10:48:26, 7.35s/it] {'loss': 0.3679, 'grad_norm': 0.7705895973782804, 'learning_rate': 4.186478555910824e-06, 'epoch': 0.57} + 57%|█████▋ | 6893/12188 [14:53:21<10:48:26, 7.35s/it] 57%|█████▋ | 6894/12188 [14:53:28<10:31:20, 7.16s/it] {'loss': 0.3173, 'grad_norm': 0.7240639894024602, 'learning_rate': 4.185167583742799e-06, 'epoch': 0.57} + 57%|█████▋ | 6894/12188 [14:53:28<10:31:20, 7.16s/it] 57%|█████▋ | 6895/12188 [14:53:35<10:28:55, 7.13s/it] {'loss': 0.3579, 'grad_norm': 0.7358884946933573, 'learning_rate': 4.183856669116933e-06, 'epoch': 0.57} + 57%|█████▋ | 6895/12188 [14:53:35<10:28:55, 7.13s/it] 57%|█████▋ | 6896/12188 [14:53:42<10:14:00, 6.96s/it] {'loss': 0.318, 'grad_norm': 0.7207568017553144, 'learning_rate': 4.182545812125795e-06, 'epoch': 0.57} + 57%|█████▋ | 6896/12188 [14:53:42<10:14:00, 6.96s/it] 57%|█████▋ | 6897/12188 [14:53:49<10:30:44, 7.15s/it] {'loss': 0.3582, 'grad_norm': 0.6984776736592815, 'learning_rate': 4.1812350128619605e-06, 'epoch': 0.57} + 57%|█████▋ | 6897/12188 [14:53:49<10:30:44, 7.15s/it] 57%|█████▋ | 6898/12188 [14:53:56<10:18:24, 7.01s/it] {'loss': 0.3442, 'grad_norm': 0.731863391933108, 'learning_rate': 4.179924271417993e-06, 'epoch': 0.57} + 57%|█████▋ | 6898/12188 [14:53:56<10:18:24, 7.01s/it] 57%|█████▋ | 6899/12188 [14:54:03<10:10:29, 6.93s/it] {'loss': 0.3533, 'grad_norm': 0.7750780146730002, 'learning_rate': 4.178613587886455e-06, 'epoch': 0.57} + 57%|█████▋ | 6899/12188 [14:54:03<10:10:29, 6.93s/it] 57%|█████▋ | 6900/12188 [14:54:10<10:09:57, 6.92s/it] {'loss': 0.2948, 'grad_norm': 0.5947962493815266, 'learning_rate': 4.177302962359908e-06, 'epoch': 0.57} + 57%|█████▋ | 6900/12188 [14:54:10<10:09:57, 6.92s/it] 57%|█████▋ | 6901/12188 [14:54:17<10:31:19, 7.16s/it] {'loss': 0.2942, 'grad_norm': 0.6723880569938533, 'learning_rate': 4.175992394930902e-06, 'epoch': 0.57} + 57%|█████▋ | 6901/12188 [14:54:17<10:31:19, 7.16s/it] 57%|█████▋ | 6902/12188 [14:54:25<10:33:06, 7.19s/it] {'loss': 0.3304, 'grad_norm': 0.7090711666992854, 'learning_rate': 4.174681885691989e-06, 'epoch': 0.57} + 57%|█████▋ | 6902/12188 [14:54:25<10:33:06, 7.19s/it] 57%|█████▋ | 6903/12188 [14:54:32<10:36:02, 7.22s/it] {'loss': 0.3085, 'grad_norm': 0.7069521877799458, 'learning_rate': 4.173371434735718e-06, 'epoch': 0.57} + 57%|█████▋ | 6903/12188 [14:54:32<10:36:02, 7.22s/it] 57%|█████▋ | 6904/12188 [14:54:39<10:38:03, 7.25s/it] {'loss': 0.3164, 'grad_norm': 0.576566070297212, 'learning_rate': 4.172061042154626e-06, 'epoch': 0.57} + 57%|█████▋ | 6904/12188 [14:54:39<10:38:03, 7.25s/it] 57%|█████▋ | 6905/12188 [14:54:46<10:33:40, 7.20s/it] {'loss': 0.3311, 'grad_norm': 0.711439052194926, 'learning_rate': 4.170750708041253e-06, 'epoch': 0.57} + 57%|█████▋ | 6905/12188 [14:54:46<10:33:40, 7.20s/it] 57%|█████▋ | 6906/12188 [14:54:53<10:30:23, 7.16s/it] {'loss': 0.3126, 'grad_norm': 0.6593520400133956, 'learning_rate': 4.169440432488134e-06, 'epoch': 0.57} + 57%|█████▋ | 6906/12188 [14:54:53<10:30:23, 7.16s/it] 57%|█████▋ | 6907/12188 [14:55:01<10:31:10, 7.17s/it] {'loss': 0.3554, 'grad_norm': 0.736135436896953, 'learning_rate': 4.168130215587795e-06, 'epoch': 0.57} + 57%|█████▋ | 6907/12188 [14:55:01<10:31:10, 7.17s/it] 57%|█████▋ | 6908/12188 [14:55:09<11:13:38, 7.66s/it] {'loss': 0.3354, 'grad_norm': 0.7145187738397223, 'learning_rate': 4.166820057432767e-06, 'epoch': 0.57} + 57%|█████▋ | 6908/12188 [14:55:09<11:13:38, 7.66s/it] 57%|█████▋ | 6909/12188 [14:55:16<10:49:15, 7.38s/it] {'loss': 0.316, 'grad_norm': 0.6576719242064437, 'learning_rate': 4.165509958115564e-06, 'epoch': 0.57} + 57%|█████▋ | 6909/12188 [14:55:16<10:49:15, 7.38s/it] 57%|█████▋ | 6910/12188 [14:55:24<10:48:41, 7.37s/it] {'loss': 0.3234, 'grad_norm': 0.7495493828894344, 'learning_rate': 4.16419991772871e-06, 'epoch': 0.57} + 57%|█████▋ | 6910/12188 [14:55:24<10:48:41, 7.37s/it] 57%|█████▋ | 6911/12188 [14:55:32<11:08:59, 7.61s/it] {'loss': 0.3458, 'grad_norm': 0.7936187555852469, 'learning_rate': 4.162889936364713e-06, 'epoch': 0.57} + 57%|█████▋ | 6911/12188 [14:55:32<11:08:59, 7.61s/it] 57%|█████▋ | 6912/12188 [14:55:39<10:57:46, 7.48s/it] {'loss': 0.3164, 'grad_norm': 0.6911874097994237, 'learning_rate': 4.161580014116086e-06, 'epoch': 0.57} + 57%|█████▋ | 6912/12188 [14:55:39<10:57:46, 7.48s/it] 57%|█████▋ | 6913/12188 [14:55:47<11:11:47, 7.64s/it] {'loss': 0.3163, 'grad_norm': 0.6841731024244019, 'learning_rate': 4.1602701510753305e-06, 'epoch': 0.57} + 57%|█████▋ | 6913/12188 [14:55:47<11:11:47, 7.64s/it] 57%|█████▋ | 6914/12188 [14:55:54<10:59:32, 7.50s/it] {'loss': 0.3153, 'grad_norm': 0.7566167260289158, 'learning_rate': 4.158960347334947e-06, 'epoch': 0.57} + 57%|█████▋ | 6914/12188 [14:55:54<10:59:32, 7.50s/it] 57%|█████▋ | 6915/12188 [14:56:02<10:58:57, 7.50s/it] {'loss': 0.3125, 'grad_norm': 0.6763104197820017, 'learning_rate': 4.157650602987434e-06, 'epoch': 0.57} + 57%|█████▋ | 6915/12188 [14:56:02<10:58:57, 7.50s/it] 57%|█████▋ | 6916/12188 [14:56:09<11:00:21, 7.52s/it] {'loss': 0.3126, 'grad_norm': 0.766683665438428, 'learning_rate': 4.15634091812528e-06, 'epoch': 0.57} + 57%|█████▋ | 6916/12188 [14:56:09<11:00:21, 7.52s/it] 57%|█████▋ | 6917/12188 [14:56:20<12:31:47, 8.56s/it] {'loss': 0.3062, 'grad_norm': 0.6628930395981967, 'learning_rate': 4.1550312928409754e-06, 'epoch': 0.57} + 57%|█████▋ | 6917/12188 [14:56:20<12:31:47, 8.56s/it] 57%|█████▋ | 6918/12188 [14:56:27<11:48:26, 8.07s/it] {'loss': 0.3246, 'grad_norm': 0.6865321714799644, 'learning_rate': 4.153721727227005e-06, 'epoch': 0.57} + 57%|█████▋ | 6918/12188 [14:56:27<11:48:26, 8.07s/it] 57%|█████▋ | 6919/12188 [14:56:33<11:06:17, 7.59s/it] {'loss': 0.2982, 'grad_norm': 0.7734844515937673, 'learning_rate': 4.152412221375846e-06, 'epoch': 0.57} + 57%|█████▋ | 6919/12188 [14:56:33<11:06:17, 7.59s/it] 57%|█████▋ | 6920/12188 [14:56:40<10:42:49, 7.32s/it] {'loss': 0.3015, 'grad_norm': 0.6653850646122772, 'learning_rate': 4.151102775379973e-06, 'epoch': 0.57} + 57%|█████▋ | 6920/12188 [14:56:40<10:42:49, 7.32s/it] 57%|█████▋ | 6921/12188 [14:56:48<10:46:55, 7.37s/it] {'loss': 0.2821, 'grad_norm': 0.6200181714559303, 'learning_rate': 4.14979338933186e-06, 'epoch': 0.57} + 57%|█████▋ | 6921/12188 [14:56:48<10:46:55, 7.37s/it] 57%|█████▋ | 6922/12188 [14:56:54<10:31:16, 7.19s/it] {'loss': 0.3039, 'grad_norm': 0.6666429496710033, 'learning_rate': 4.148484063323969e-06, 'epoch': 0.57} + 57%|█████▋ | 6922/12188 [14:56:54<10:31:16, 7.19s/it] 57%|█████▋ | 6923/12188 [14:57:02<10:29:43, 7.18s/it] {'loss': 0.3632, 'grad_norm': 0.7225994028029408, 'learning_rate': 4.147174797448769e-06, 'epoch': 0.57} + 57%|█████▋ | 6923/12188 [14:57:02<10:29:43, 7.18s/it] 57%|█████▋ | 6924/12188 [14:57:08<10:17:47, 7.04s/it] {'loss': 0.2901, 'grad_norm': 0.5811358968776329, 'learning_rate': 4.145865591798712e-06, 'epoch': 0.57} + 57%|█████▋ | 6924/12188 [14:57:08<10:17:47, 7.04s/it] 57%|█████▋ | 6925/12188 [14:57:15<10:17:11, 7.04s/it] {'loss': 0.2988, 'grad_norm': 0.7187757374627715, 'learning_rate': 4.144556446466255e-06, 'epoch': 0.57} + 57%|█████▋ | 6925/12188 [14:57:15<10:17:11, 7.04s/it] 57%|█████▋ | 6926/12188 [14:57:24<10:49:30, 7.41s/it] {'loss': 0.2925, 'grad_norm': 0.6075443490821545, 'learning_rate': 4.14324736154385e-06, 'epoch': 0.57} + 57%|█████▋ | 6926/12188 [14:57:24<10:49:30, 7.41s/it] 57%|█████▋ | 6927/12188 [14:57:31<10:36:57, 7.26s/it] {'loss': 0.2939, 'grad_norm': 0.6104094431953117, 'learning_rate': 4.141938337123936e-06, 'epoch': 0.57} + 57%|█████▋ | 6927/12188 [14:57:31<10:36:57, 7.26s/it] 57%|█████▋ | 6928/12188 [14:57:37<10:22:15, 7.10s/it] {'loss': 0.3577, 'grad_norm': 0.7326633965073347, 'learning_rate': 4.1406293732989615e-06, 'epoch': 0.57} + 57%|█████▋ | 6928/12188 [14:57:37<10:22:15, 7.10s/it] 57%|█████▋ | 6929/12188 [14:57:45<10:47:16, 7.38s/it] {'loss': 0.3879, 'grad_norm': 0.6428367028747346, 'learning_rate': 4.139320470161357e-06, 'epoch': 0.57} + 57%|█████▋ | 6929/12188 [14:57:45<10:47:16, 7.38s/it] 57%|█████▋ | 6930/12188 [14:57:52<10:39:13, 7.29s/it] {'loss': 0.3467, 'grad_norm': 0.9053085205224067, 'learning_rate': 4.138011627803557e-06, 'epoch': 0.57} + 57%|█████▋ | 6930/12188 [14:57:52<10:39:13, 7.29s/it] 57%|█████▋ | 6931/12188 [14:58:00<10:42:25, 7.33s/it] {'loss': 0.3158, 'grad_norm': 0.7136946742105611, 'learning_rate': 4.136702846317996e-06, 'epoch': 0.57} + 57%|█████▋ | 6931/12188 [14:58:00<10:42:25, 7.33s/it] 57%|█████▋ | 6932/12188 [14:58:07<10:34:00, 7.24s/it] {'loss': 0.3085, 'grad_norm': 0.6836885316216437, 'learning_rate': 4.135394125797089e-06, 'epoch': 0.57} + 57%|█████▋ | 6932/12188 [14:58:07<10:34:00, 7.24s/it] 57%|█████▋ | 6933/12188 [14:58:14<10:22:19, 7.11s/it] {'loss': 0.3274, 'grad_norm': 0.6801341001971227, 'learning_rate': 4.134085466333262e-06, 'epoch': 0.57} + 57%|█████▋ | 6933/12188 [14:58:14<10:22:19, 7.11s/it] 57%|█████▋ | 6934/12188 [14:58:20<10:14:22, 7.02s/it] {'loss': 0.3294, 'grad_norm': 0.7308131235519495, 'learning_rate': 4.132776868018927e-06, 'epoch': 0.57} + 57%|█████▋ | 6934/12188 [14:58:20<10:14:22, 7.02s/it] 57%|█████▋ | 6935/12188 [14:58:27<10:09:53, 6.97s/it] {'loss': 0.3052, 'grad_norm': 0.6641076769699115, 'learning_rate': 4.131468330946495e-06, 'epoch': 0.57} + 57%|█████▋ | 6935/12188 [14:58:27<10:09:53, 6.97s/it] 57%|█████▋ | 6936/12188 [14:58:34<10:05:25, 6.92s/it] {'loss': 0.2946, 'grad_norm': 0.6992090043868859, 'learning_rate': 4.130159855208377e-06, 'epoch': 0.57} + 57%|█████▋ | 6936/12188 [14:58:34<10:05:25, 6.92s/it] 57%|█████▋ | 6937/12188 [14:58:42<10:38:37, 7.30s/it] {'loss': 0.2886, 'grad_norm': 0.6337122386852062, 'learning_rate': 4.12885144089697e-06, 'epoch': 0.57} + 57%|█████▋ | 6937/12188 [14:58:42<10:38:37, 7.30s/it] 57%|█████▋ | 6938/12188 [14:58:50<10:43:57, 7.36s/it] {'loss': 0.3234, 'grad_norm': 0.7146726445488698, 'learning_rate': 4.127543088104676e-06, 'epoch': 0.57} + 57%|█████▋ | 6938/12188 [14:58:50<10:43:57, 7.36s/it] 57%|█████▋ | 6939/12188 [14:58:57<10:31:13, 7.22s/it] {'loss': 0.2682, 'grad_norm': 0.6709743314915877, 'learning_rate': 4.126234796923887e-06, 'epoch': 0.57} + 57%|█████▋ | 6939/12188 [14:58:57<10:31:13, 7.22s/it] 57%|█████▋ | 6940/12188 [14:59:04<10:27:23, 7.17s/it] {'loss': 0.3243, 'grad_norm': 0.6554346451388557, 'learning_rate': 4.124926567446993e-06, 'epoch': 0.57} + 57%|█████▋ | 6940/12188 [14:59:04<10:27:23, 7.17s/it] 57%|█████▋ | 6941/12188 [14:59:10<10:13:16, 7.01s/it] {'loss': 0.3005, 'grad_norm': 0.6816801438719239, 'learning_rate': 4.1236183997663795e-06, 'epoch': 0.57} + 57%|█████▋ | 6941/12188 [14:59:10<10:13:16, 7.01s/it] 57%|█████▋ | 6942/12188 [14:59:17<10:03:57, 6.91s/it] {'loss': 0.3179, 'grad_norm': 0.6616986328437117, 'learning_rate': 4.122310293974425e-06, 'epoch': 0.57} + 57%|█████▋ | 6942/12188 [14:59:17<10:03:57, 6.91s/it] 57%|█████▋ | 6943/12188 [14:59:24<10:11:41, 7.00s/it] {'loss': 0.3499, 'grad_norm': 0.7536716282860246, 'learning_rate': 4.121002250163507e-06, 'epoch': 0.57} + 57%|█████▋ | 6943/12188 [14:59:24<10:11:41, 7.00s/it] 57%|█████▋ | 6944/12188 [14:59:32<10:19:32, 7.09s/it] {'loss': 0.3245, 'grad_norm': 0.6910769457336239, 'learning_rate': 4.119694268426001e-06, 'epoch': 0.57} + 57%|█████▋ | 6944/12188 [14:59:32<10:19:32, 7.09s/it] 57%|█████▋ | 6945/12188 [14:59:39<10:18:56, 7.08s/it] {'loss': 0.3003, 'grad_norm': 0.7199650672932433, 'learning_rate': 4.1183863488542686e-06, 'epoch': 0.57} + 57%|█████▋ | 6945/12188 [14:59:39<10:18:56, 7.08s/it] 57%|█████▋ | 6946/12188 [14:59:45<10:04:47, 6.92s/it] {'loss': 0.2947, 'grad_norm': 0.6855904293951374, 'learning_rate': 4.117078491540678e-06, 'epoch': 0.57} + 57%|█████▋ | 6946/12188 [14:59:45<10:04:47, 6.92s/it] 57%|█████▋ | 6947/12188 [14:59:52<9:53:17, 6.79s/it] {'loss': 0.3219, 'grad_norm': 0.6601123027333226, 'learning_rate': 4.115770696577584e-06, 'epoch': 0.57} + 57%|█████▋ | 6947/12188 [14:59:52<9:53:17, 6.79s/it] 57%|█████▋ | 6948/12188 [14:59:59<10:20:57, 7.11s/it] {'loss': 0.3212, 'grad_norm': 0.6852906784233334, 'learning_rate': 4.114462964057343e-06, 'epoch': 0.57} + 57%|█████▋ | 6948/12188 [14:59:59<10:20:57, 7.11s/it] 57%|█████▋ | 6949/12188 [15:00:07<10:35:36, 7.28s/it] {'loss': 0.3261, 'grad_norm': 0.7233307258523697, 'learning_rate': 4.113155294072307e-06, 'epoch': 0.57} + 57%|█████▋ | 6949/12188 [15:00:07<10:35:36, 7.28s/it] 57%|█████▋ | 6950/12188 [15:00:15<10:38:15, 7.31s/it] {'loss': 0.3033, 'grad_norm': 0.5619957158477011, 'learning_rate': 4.111847686714816e-06, 'epoch': 0.57} + 57%|█████▋ | 6950/12188 [15:00:15<10:38:15, 7.31s/it] 57%|█████▋ | 6951/12188 [15:00:22<10:39:27, 7.33s/it] {'loss': 0.3665, 'grad_norm': 0.746567132301524, 'learning_rate': 4.110540142077217e-06, 'epoch': 0.57} + 57%|█████▋ | 6951/12188 [15:00:22<10:39:27, 7.33s/it] 57%|█████▋ | 6952/12188 [15:00:29<10:26:41, 7.18s/it] {'loss': 0.3481, 'grad_norm': 0.716510758884508, 'learning_rate': 4.109232660251842e-06, 'epoch': 0.57} + 57%|█████▋ | 6952/12188 [15:00:29<10:26:41, 7.18s/it] 57%|█████▋ | 6953/12188 [15:00:37<10:54:38, 7.50s/it] {'loss': 0.342, 'grad_norm': 0.6596294566299962, 'learning_rate': 4.107925241331027e-06, 'epoch': 0.57} + 57%|█████▋ | 6953/12188 [15:00:37<10:54:38, 7.50s/it] 57%|█████▋ | 6954/12188 [15:00:44<10:40:16, 7.34s/it] {'loss': 0.3229, 'grad_norm': 0.7476415645316282, 'learning_rate': 4.106617885407099e-06, 'epoch': 0.57} + 57%|█████▋ | 6954/12188 [15:00:44<10:40:16, 7.34s/it] 57%|█████▋ | 6955/12188 [15:00:51<10:34:01, 7.27s/it] {'loss': 0.329, 'grad_norm': 0.6956286973617859, 'learning_rate': 4.105310592572378e-06, 'epoch': 0.57} + 57%|█████▋ | 6955/12188 [15:00:51<10:34:01, 7.27s/it] 57%|█████▋ | 6956/12188 [15:00:58<10:33:46, 7.27s/it] {'loss': 0.3247, 'grad_norm': 0.7440644060725291, 'learning_rate': 4.104003362919186e-06, 'epoch': 0.57} + 57%|█████▋ | 6956/12188 [15:00:58<10:33:46, 7.27s/it] 57%|█████▋ | 6957/12188 [15:01:06<10:37:28, 7.31s/it] {'loss': 0.3096, 'grad_norm': 0.7435992787321252, 'learning_rate': 4.102696196539839e-06, 'epoch': 0.57} + 57%|█████▋ | 6957/12188 [15:01:06<10:37:28, 7.31s/it] 57%|█████▋ | 6958/12188 [15:01:13<10:28:11, 7.21s/it] {'loss': 0.3009, 'grad_norm': 0.6947084227824596, 'learning_rate': 4.101389093526643e-06, 'epoch': 0.57} + 57%|█████▋ | 6958/12188 [15:01:13<10:28:11, 7.21s/it] 57%|█████▋ | 6959/12188 [15:01:19<10:14:25, 7.05s/it] {'loss': 0.295, 'grad_norm': 0.6657235810689347, 'learning_rate': 4.1000820539719076e-06, 'epoch': 0.57} + 57%|█████▋ | 6959/12188 [15:01:19<10:14:25, 7.05s/it] 57%|█████▋ | 6960/12188 [15:01:27<10:21:35, 7.13s/it] {'loss': 0.298, 'grad_norm': 0.658010379340932, 'learning_rate': 4.0987750779679305e-06, 'epoch': 0.57} + 57%|█████▋ | 6960/12188 [15:01:27<10:21:35, 7.13s/it] 57%|█████▋ | 6961/12188 [15:01:35<10:38:17, 7.33s/it] {'loss': 0.2924, 'grad_norm': 0.6071285705719381, 'learning_rate': 4.097468165607008e-06, 'epoch': 0.57} + 57%|█████▋ | 6961/12188 [15:01:35<10:38:17, 7.33s/it] 57%|█████▋ | 6962/12188 [15:01:41<10:19:23, 7.11s/it] {'loss': 0.3278, 'grad_norm': 0.6667954722478523, 'learning_rate': 4.0961613169814365e-06, 'epoch': 0.57} + 57%|█████▋ | 6962/12188 [15:01:41<10:19:23, 7.11s/it] 57%|█████▋ | 6963/12188 [15:01:48<10:08:31, 6.99s/it] {'loss': 0.3577, 'grad_norm': 0.6744910234241758, 'learning_rate': 4.094854532183498e-06, 'epoch': 0.57} + 57%|█████▋ | 6963/12188 [15:01:48<10:08:31, 6.99s/it] 57%|█████▋ | 6964/12188 [15:01:55<10:11:31, 7.02s/it] {'loss': 0.2983, 'grad_norm': 0.7833084025449972, 'learning_rate': 4.0935478113054805e-06, 'epoch': 0.57} + 57%|█████▋ | 6964/12188 [15:01:55<10:11:31, 7.02s/it] 57%|█████▋ | 6965/12188 [15:02:02<10:00:26, 6.90s/it] {'loss': 0.3252, 'grad_norm': 0.6330834318486096, 'learning_rate': 4.092241154439658e-06, 'epoch': 0.57} + 57%|█████▋ | 6965/12188 [15:02:02<10:00:26, 6.90s/it] 57%|█████▋ | 6966/12188 [15:02:08<9:55:26, 6.84s/it] {'loss': 0.3394, 'grad_norm': 0.6654606160316849, 'learning_rate': 4.090934561678308e-06, 'epoch': 0.57} + 57%|█████▋ | 6966/12188 [15:02:08<9:55:26, 6.84s/it] 57%|█████▋ | 6967/12188 [15:02:15<10:05:22, 6.96s/it] {'loss': 0.2824, 'grad_norm': 0.7222240377196809, 'learning_rate': 4.089628033113699e-06, 'epoch': 0.57} + 57%|█████▋ | 6967/12188 [15:02:15<10:05:22, 6.96s/it] 57%|█████▋ | 6968/12188 [15:02:22<10:05:55, 6.96s/it] {'loss': 0.3171, 'grad_norm': 0.7605795957027262, 'learning_rate': 4.088321568838095e-06, 'epoch': 0.57} + 57%|█████▋ | 6968/12188 [15:02:22<10:05:55, 6.96s/it] 57%|█████▋ | 6969/12188 [15:02:31<10:46:54, 7.44s/it] {'loss': 0.3372, 'grad_norm': 0.7628074233366285, 'learning_rate': 4.087015168943759e-06, 'epoch': 0.57} + 57%|█████▋ | 6969/12188 [15:02:31<10:46:54, 7.44s/it] 57%|█████▋ | 6970/12188 [15:02:39<10:49:33, 7.47s/it] {'loss': 0.3267, 'grad_norm': 0.645841160089767, 'learning_rate': 4.085708833522942e-06, 'epoch': 0.57} + 57%|█████▋ | 6970/12188 [15:02:39<10:49:33, 7.47s/it] 57%|█████▋ | 6971/12188 [15:02:45<10:24:11, 7.18s/it] {'loss': 0.3543, 'grad_norm': 0.6849193617573982, 'learning_rate': 4.084402562667898e-06, 'epoch': 0.57} + 57%|█████▋ | 6971/12188 [15:02:45<10:24:11, 7.18s/it] 57%|█████▋ | 6972/12188 [15:02:52<10:18:55, 7.12s/it] {'loss': 0.3176, 'grad_norm': 0.6168335123791162, 'learning_rate': 4.083096356470877e-06, 'epoch': 0.57} + 57%|█████▋ | 6972/12188 [15:02:52<10:18:55, 7.12s/it] 57%|█████▋ | 6973/12188 [15:02:59<10:13:05, 7.05s/it] {'loss': 0.296, 'grad_norm': 0.6995823518767246, 'learning_rate': 4.081790215024116e-06, 'epoch': 0.57} + 57%|█████▋ | 6973/12188 [15:02:59<10:13:05, 7.05s/it] 57%|█████▋ | 6974/12188 [15:03:06<10:21:50, 7.16s/it] {'loss': 0.3123, 'grad_norm': 0.8358496185520664, 'learning_rate': 4.080484138419856e-06, 'epoch': 0.57} + 57%|█████▋ | 6974/12188 [15:03:06<10:21:50, 7.16s/it] 57%|█████▋ | 6975/12188 [15:03:13<10:18:07, 7.11s/it] {'loss': 0.31, 'grad_norm': 0.9605081009523275, 'learning_rate': 4.079178126750329e-06, 'epoch': 0.57} + 57%|█████▋ | 6975/12188 [15:03:13<10:18:07, 7.11s/it] 57%|█████▋ | 6976/12188 [15:03:21<10:37:02, 7.33s/it] {'loss': 0.3147, 'grad_norm': 0.6915835142002024, 'learning_rate': 4.077872180107762e-06, 'epoch': 0.57} + 57%|█████▋ | 6976/12188 [15:03:21<10:37:02, 7.33s/it] 57%|█████▋ | 6977/12188 [15:03:28<10:28:35, 7.24s/it] {'loss': 0.3087, 'grad_norm': 0.6954388810117653, 'learning_rate': 4.076566298584382e-06, 'epoch': 0.57} + 57%|█████▋ | 6977/12188 [15:03:28<10:28:35, 7.24s/it] 57%|█████▋ | 6978/12188 [15:03:36<10:34:04, 7.30s/it] {'loss': 0.3109, 'grad_norm': 0.6288023146294207, 'learning_rate': 4.075260482272404e-06, 'epoch': 0.57} + 57%|█████▋ | 6978/12188 [15:03:36<10:34:04, 7.30s/it] 57%|█████▋ | 6979/12188 [15:03:48<12:42:35, 8.78s/it] {'loss': 0.3522, 'grad_norm': 0.8400815968245532, 'learning_rate': 4.073954731264045e-06, 'epoch': 0.57} + 57%|█████▋ | 6979/12188 [15:03:48<12:42:35, 8.78s/it] 57%|█████▋ | 6980/12188 [15:03:56<12:22:00, 8.55s/it] {'loss': 0.341, 'grad_norm': 0.7037062298508322, 'learning_rate': 4.072649045651518e-06, 'epoch': 0.57} + 57%|█████▋ | 6980/12188 [15:03:56<12:22:00, 8.55s/it] 57%|█████▋ | 6981/12188 [15:04:04<12:20:42, 8.54s/it] {'loss': 0.3487, 'grad_norm': 0.6589907890843151, 'learning_rate': 4.071343425527024e-06, 'epoch': 0.57} + 57%|█████▋ | 6981/12188 [15:04:04<12:20:42, 8.54s/it] 57%|█████▋ | 6982/12188 [15:04:12<11:45:02, 8.13s/it] {'loss': 0.3173, 'grad_norm': 0.7206084481598822, 'learning_rate': 4.070037870982766e-06, 'epoch': 0.57} + 57%|█████▋ | 6982/12188 [15:04:12<11:45:02, 8.13s/it] 57%|█████▋ | 6983/12188 [15:04:18<11:04:54, 7.66s/it] {'loss': 0.2939, 'grad_norm': 0.6795358871422833, 'learning_rate': 4.068732382110937e-06, 'epoch': 0.57} + 57%|█████▋ | 6983/12188 [15:04:18<11:04:54, 7.66s/it] 57%|█████▋ | 6984/12188 [15:04:25<10:41:29, 7.40s/it] {'loss': 0.2776, 'grad_norm': 0.6345399339562025, 'learning_rate': 4.0674269590037315e-06, 'epoch': 0.57} + 57%|█████▋ | 6984/12188 [15:04:25<10:41:29, 7.40s/it] 57%|█████▋ | 6985/12188 [15:04:34<11:15:16, 7.79s/it] {'loss': 0.3241, 'grad_norm': 1.0180950559369122, 'learning_rate': 4.066121601753338e-06, 'epoch': 0.57} + 57%|█████▋ | 6985/12188 [15:04:34<11:15:16, 7.79s/it] 57%|█████▋ | 6986/12188 [15:04:40<10:46:25, 7.46s/it] {'loss': 0.293, 'grad_norm': 0.6647794320071855, 'learning_rate': 4.064816310451934e-06, 'epoch': 0.57} + 57%|█████▋ | 6986/12188 [15:04:40<10:46:25, 7.46s/it] 57%|█████▋ | 6987/12188 [15:04:47<10:24:11, 7.20s/it] {'loss': 0.2854, 'grad_norm': 0.6806961351540692, 'learning_rate': 4.0635110851917035e-06, 'epoch': 0.57} + 57%|█████▋ | 6987/12188 [15:04:47<10:24:11, 7.20s/it] 57%|█████▋ | 6988/12188 [15:04:55<10:37:11, 7.35s/it] {'loss': 0.3296, 'grad_norm': 0.7096645670227227, 'learning_rate': 4.062205926064813e-06, 'epoch': 0.57} + 57%|█████▋ | 6988/12188 [15:04:55<10:37:11, 7.35s/it] 57%|█████▋ | 6989/12188 [15:05:03<10:53:41, 7.54s/it] {'loss': 0.2866, 'grad_norm': 0.6824147303065012, 'learning_rate': 4.060900833163433e-06, 'epoch': 0.57} + 57%|█████▋ | 6989/12188 [15:05:03<10:53:41, 7.54s/it] 57%|█████▋ | 6990/12188 [15:05:10<10:42:43, 7.42s/it] {'loss': 0.3199, 'grad_norm': 0.6827812499750032, 'learning_rate': 4.059595806579728e-06, 'epoch': 0.57} + 57%|█████▋ | 6990/12188 [15:05:10<10:42:43, 7.42s/it] 57%|█████▋ | 6991/12188 [15:05:18<11:05:17, 7.68s/it] {'loss': 0.3038, 'grad_norm': 0.6479100534871851, 'learning_rate': 4.058290846405856e-06, 'epoch': 0.57} + 57%|█████▋ | 6991/12188 [15:05:18<11:05:17, 7.68s/it] 57%|█████▋ | 6992/12188 [15:05:27<11:27:28, 7.94s/it] {'loss': 0.3439, 'grad_norm': 0.7497434832635859, 'learning_rate': 4.05698595273397e-06, 'epoch': 0.57} + 57%|█████▋ | 6992/12188 [15:05:27<11:27:28, 7.94s/it] 57%|█████▋ | 6993/12188 [15:05:34<11:05:24, 7.69s/it] {'loss': 0.3234, 'grad_norm': 0.6370568116620402, 'learning_rate': 4.0556811256562235e-06, 'epoch': 0.57} + 57%|█████▋ | 6993/12188 [15:05:34<11:05:24, 7.69s/it] 57%|█████▋ | 6994/12188 [15:05:41<10:59:54, 7.62s/it] {'loss': 0.32, 'grad_norm': 0.7355222179703254, 'learning_rate': 4.054376365264758e-06, 'epoch': 0.57} + 57%|█████▋ | 6994/12188 [15:05:41<10:59:54, 7.62s/it] 57%|█████▋ | 6995/12188 [15:05:48<10:52:49, 7.54s/it] {'loss': 0.3236, 'grad_norm': 0.7589568853042064, 'learning_rate': 4.053071671651715e-06, 'epoch': 0.57} + 57%|█████▋ | 6995/12188 [15:05:48<10:52:49, 7.54s/it] 57%|█████▋ | 6996/12188 [15:05:56<10:48:30, 7.49s/it] {'loss': 0.3645, 'grad_norm': 0.860671963086332, 'learning_rate': 4.051767044909227e-06, 'epoch': 0.57} + 57%|█████▋ | 6996/12188 [15:05:56<10:48:30, 7.49s/it] 57%|█████▋ | 6997/12188 [15:06:03<10:26:47, 7.24s/it] {'loss': 0.316, 'grad_norm': 0.746475904544963, 'learning_rate': 4.050462485129428e-06, 'epoch': 0.57} + 57%|█████▋ | 6997/12188 [15:06:03<10:26:47, 7.24s/it] 57%|█████▋ | 6998/12188 [15:06:10<10:27:59, 7.26s/it] {'loss': 0.3011, 'grad_norm': 0.6466842210021542, 'learning_rate': 4.049157992404444e-06, 'epoch': 0.57} + 57%|█████▋ | 6998/12188 [15:06:10<10:27:59, 7.26s/it] 57%|█████▋ | 6999/12188 [15:06:17<10:12:57, 7.09s/it] {'loss': 0.2971, 'grad_norm': 0.6256971774156157, 'learning_rate': 4.047853566826393e-06, 'epoch': 0.57} + 57%|█████▋ | 6999/12188 [15:06:17<10:12:57, 7.09s/it] 57%|█████▋ | 7000/12188 [15:06:24<10:18:21, 7.15s/it] {'loss': 0.359, 'grad_norm': 0.6873831217710347, 'learning_rate': 4.046549208487395e-06, 'epoch': 0.57} + 57%|█████▋ | 7000/12188 [15:06:24<10:18:21, 7.15s/it]/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/utils/checkpoint.py:87: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( + 57%|█████▋ | 7001/12188 [15:06:50<18:21:43, 12.74s/it] {'loss': 0.3272, 'grad_norm': 0.6357290531075114, 'learning_rate': 4.045244917479559e-06, 'epoch': 0.57} + 57%|█████▋ | 7001/12188 [15:06:50<18:21:43, 12.74s/it] 57%|█████▋ | 7002/12188 [15:06:56<15:42:54, 10.91s/it] {'loss': 0.3027, 'grad_norm': 0.7207354426785008, 'learning_rate': 4.043940693894993e-06, 'epoch': 0.57} + 57%|█████▋ | 7002/12188 [15:06:56<15:42:54, 10.91s/it] 57%|█████▋ | 7003/12188 [15:07:05<14:47:07, 10.27s/it] {'loss': 0.317, 'grad_norm': 0.6742783413324004, 'learning_rate': 4.042636537825801e-06, 'epoch': 0.57} + 57%|█████▋ | 7003/12188 [15:07:05<14:47:07, 10.27s/it] 57%|█████▋ | 7004/12188 [15:07:12<13:21:37, 9.28s/it] {'loss': 0.2893, 'grad_norm': 0.6687598670234369, 'learning_rate': 4.041332449364076e-06, 'epoch': 0.57} + 57%|█████▋ | 7004/12188 [15:07:12<13:21:37, 9.28s/it] 57%|█████▋ | 7005/12188 [15:07:20<12:40:57, 8.81s/it] {'loss': 0.3662, 'grad_norm': 0.7331421099940713, 'learning_rate': 4.040028428601916e-06, 'epoch': 0.57} + 57%|█████▋ | 7005/12188 [15:07:20<12:40:57, 8.81s/it] 57%|█████▋ | 7006/12188 [15:07:27<11:57:56, 8.31s/it] {'loss': 0.3248, 'grad_norm': 0.9300866609790351, 'learning_rate': 4.038724475631403e-06, 'epoch': 0.57} + 57%|█████▋ | 7006/12188 [15:07:27<11:57:56, 8.31s/it] 57%|█████▋ | 7007/12188 [15:07:34<11:41:07, 8.12s/it] {'loss': 0.321, 'grad_norm': 0.6436819288037349, 'learning_rate': 4.037420590544623e-06, 'epoch': 0.57} + 57%|█████▋ | 7007/12188 [15:07:35<11:41:07, 8.12s/it] 57%|█████▋ | 7008/12188 [15:07:43<11:40:16, 8.11s/it] {'loss': 0.3304, 'grad_norm': 0.6619033706271414, 'learning_rate': 4.036116773433657e-06, 'epoch': 0.57} + 57%|█████▋ | 7008/12188 [15:07:43<11:40:16, 8.11s/it] 58%|█████▊ | 7009/12188 [15:07:49<10:59:57, 7.65s/it] {'loss': 0.332, 'grad_norm': 0.7017569035422633, 'learning_rate': 4.034813024390575e-06, 'epoch': 0.58} + 58%|█████▊ | 7009/12188 [15:07:49<10:59:57, 7.65s/it] 58%|█████▊ | 7010/12188 [15:07:56<10:40:18, 7.42s/it] {'loss': 0.3104, 'grad_norm': 0.749426171129015, 'learning_rate': 4.033509343507445e-06, 'epoch': 0.58} + 58%|█████▊ | 7010/12188 [15:07:56<10:40:18, 7.42s/it] 58%|█████▊ | 7011/12188 [15:08:03<10:27:01, 7.27s/it] {'loss': 0.3182, 'grad_norm': 0.8400309910631586, 'learning_rate': 4.032205730876335e-06, 'epoch': 0.58} + 58%|█████▊ | 7011/12188 [15:08:03<10:27:01, 7.27s/it] 58%|█████▊ | 7012/12188 [15:08:10<10:31:37, 7.32s/it] {'loss': 0.3758, 'grad_norm': 0.88225610856447, 'learning_rate': 4.0309021865892985e-06, 'epoch': 0.58} + 58%|█████▊ | 7012/12188 [15:08:10<10:31:37, 7.32s/it] 58%|█████▊ | 7013/12188 [15:08:17<10:14:15, 7.12s/it] {'loss': 0.2901, 'grad_norm': 0.6354609103955794, 'learning_rate': 4.029598710738395e-06, 'epoch': 0.58} + 58%|█████▊ | 7013/12188 [15:08:17<10:14:15, 7.12s/it] 58%|█████▊ | 7014/12188 [15:08:24<10:01:05, 6.97s/it] {'loss': 0.2934, 'grad_norm': 0.6277512216747303, 'learning_rate': 4.02829530341567e-06, 'epoch': 0.58} + 58%|█████▊ | 7014/12188 [15:08:24<10:01:05, 6.97s/it] 58%|█████▊ | 7015/12188 [15:08:30<9:51:02, 6.86s/it] {'loss': 0.3461, 'grad_norm': 0.7473433064487568, 'learning_rate': 4.02699196471317e-06, 'epoch': 0.58} + 58%|█████▊ | 7015/12188 [15:08:30<9:51:02, 6.86s/it] 58%|█████▊ | 7016/12188 [15:08:38<10:24:13, 7.24s/it] {'loss': 0.3272, 'grad_norm': 0.6917624863315063, 'learning_rate': 4.025688694722936e-06, 'epoch': 0.58} + 58%|█████▊ | 7016/12188 [15:08:38<10:24:13, 7.24s/it] 58%|█████▊ | 7017/12188 [15:08:47<10:50:10, 7.54s/it] {'loss': 0.2854, 'grad_norm': 0.7544928460092258, 'learning_rate': 4.0243854935369975e-06, 'epoch': 0.58} + 58%|█████▊ | 7017/12188 [15:08:47<10:50:10, 7.54s/it] 58%|█████▊ | 7018/12188 [15:08:53<10:29:53, 7.31s/it] {'loss': 0.3671, 'grad_norm': 0.6628049061848498, 'learning_rate': 4.0230823612473916e-06, 'epoch': 0.58} + 58%|█████▊ | 7018/12188 [15:08:53<10:29:53, 7.31s/it] 58%|█████▊ | 7019/12188 [15:09:00<10:14:04, 7.13s/it] {'loss': 0.353, 'grad_norm': 0.7068680598034355, 'learning_rate': 4.021779297946137e-06, 'epoch': 0.58} + 58%|█████▊ | 7019/12188 [15:09:00<10:14:04, 7.13s/it] 58%|█████▊ | 7020/12188 [15:09:07<9:59:58, 6.97s/it] {'loss': 0.3116, 'grad_norm': 0.6805002595505221, 'learning_rate': 4.020476303725257e-06, 'epoch': 0.58} + 58%|█████▊ | 7020/12188 [15:09:07<9:59:58, 6.97s/it] 58%|█████▊ | 7021/12188 [15:09:14<10:04:04, 7.01s/it] {'loss': 0.3437, 'grad_norm': 0.7295191922547828, 'learning_rate': 4.0191733786767685e-06, 'epoch': 0.58} + 58%|█████▊ | 7021/12188 [15:09:14<10:04:04, 7.01s/it] 58%|█████▊ | 7022/12188 [15:09:22<10:23:59, 7.25s/it] {'loss': 0.3048, 'grad_norm': 0.6288543326865177, 'learning_rate': 4.017870522892679e-06, 'epoch': 0.58} + 58%|█████▊ | 7022/12188 [15:09:22<10:23:59, 7.25s/it] 58%|█████▊ | 7023/12188 [15:09:29<10:18:17, 7.18s/it] {'loss': 0.3329, 'grad_norm': 0.6479572051075171, 'learning_rate': 4.016567736464995e-06, 'epoch': 0.58} + 58%|█████▊ | 7023/12188 [15:09:29<10:18:17, 7.18s/it] 58%|█████▊ | 7024/12188 [15:09:36<10:12:10, 7.11s/it] {'loss': 0.3069, 'grad_norm': 0.6885586219155, 'learning_rate': 4.015265019485718e-06, 'epoch': 0.58} + 58%|█████▊ | 7024/12188 [15:09:36<10:12:10, 7.11s/it] 58%|█████▊ | 7025/12188 [15:09:42<10:01:22, 6.99s/it] {'loss': 0.3157, 'grad_norm': 0.714750423369962, 'learning_rate': 4.013962372046842e-06, 'epoch': 0.58} + 58%|█████▊ | 7025/12188 [15:09:42<10:01:22, 6.99s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f8971319030> +[Try #0] Failed to fetch sample 4386045 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f8971319030> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Picture 4 of 18'"}, {'from': 'gpt', 'value': '\nclick(x=0.245, y=0.863)\n'}]} + 58%|█████▊ | 7026/12188 [15:09:50<10:21:57, 7.23s/it] {'loss': 0.2985, 'grad_norm': 0.7019730291284866, 'learning_rate': 4.012659794240362e-06, 'epoch': 0.58} + 58%|█████▊ | 7026/12188 [15:09:50<10:21:57, 7.23s/it] 58%|█████▊ | 7027/12188 [15:09:58<10:39:05, 7.43s/it] {'loss': 0.3145, 'grad_norm': 0.6602756867442732, 'learning_rate': 4.011357286158258e-06, 'epoch': 0.58} + 58%|█████▊ | 7027/12188 [15:09:58<10:39:05, 7.43s/it] 58%|█████▊ | 7028/12188 [15:10:05<10:26:53, 7.29s/it] {'loss': 0.3042, 'grad_norm': 0.6827533306835487, 'learning_rate': 4.010054847892515e-06, 'epoch': 0.58} + 58%|█████▊ | 7028/12188 [15:10:05<10:26:53, 7.29s/it] 58%|█████▊ | 7029/12188 [15:10:12<10:31:35, 7.35s/it] {'loss': 0.3248, 'grad_norm': 0.7716707439859777, 'learning_rate': 4.008752479535109e-06, 'epoch': 0.58} + 58%|█████▊ | 7029/12188 [15:10:12<10:31:35, 7.35s/it] 58%|█████▊ | 7030/12188 [15:10:20<10:35:44, 7.40s/it] {'loss': 0.3599, 'grad_norm': 0.6530558880017374, 'learning_rate': 4.00745018117801e-06, 'epoch': 0.58} + 58%|█████▊ | 7030/12188 [15:10:20<10:35:44, 7.40s/it] 58%|█████▊ | 7031/12188 [15:10:29<11:11:55, 7.82s/it] {'loss': 0.3188, 'grad_norm': 0.6113233852803779, 'learning_rate': 4.006147952913185e-06, 'epoch': 0.58} + 58%|█████▊ | 7031/12188 [15:10:29<11:11:55, 7.82s/it] 58%|█████▊ | 7032/12188 [15:10:36<11:01:13, 7.69s/it] {'loss': 0.3092, 'grad_norm': 0.683861227760472, 'learning_rate': 4.004845794832594e-06, 'epoch': 0.58} + 58%|█████▊ | 7032/12188 [15:10:36<11:01:13, 7.69s/it] 58%|█████▊ | 7033/12188 [15:10:43<10:45:44, 7.52s/it] {'loss': 0.3116, 'grad_norm': 0.7185269744110251, 'learning_rate': 4.003543707028193e-06, 'epoch': 0.58} + 58%|█████▊ | 7033/12188 [15:10:43<10:45:44, 7.52s/it] 58%|█████▊ | 7034/12188 [15:10:51<10:38:40, 7.44s/it] {'loss': 0.3306, 'grad_norm': 0.7496553611210701, 'learning_rate': 4.002241689591937e-06, 'epoch': 0.58} + 58%|█████▊ | 7034/12188 [15:10:51<10:38:40, 7.44s/it] 58%|█████▊ | 7035/12188 [15:10:57<10:21:31, 7.24s/it] {'loss': 0.3232, 'grad_norm': 0.635405816814588, 'learning_rate': 4.000939742615767e-06, 'epoch': 0.58} + 58%|█████▊ | 7035/12188 [15:10:57<10:21:31, 7.24s/it] 58%|█████▊ | 7036/12188 [15:11:04<10:11:35, 7.12s/it] {'loss': 0.2893, 'grad_norm': 0.6533167004693939, 'learning_rate': 3.999637866191631e-06, 'epoch': 0.58} + 58%|█████▊ | 7036/12188 [15:11:04<10:11:35, 7.12s/it] 58%|█████▊ | 7037/12188 [15:11:15<11:49:26, 8.26s/it] {'loss': 0.3186, 'grad_norm': 0.6371600003158373, 'learning_rate': 3.998336060411459e-06, 'epoch': 0.58} + 58%|█████▊ | 7037/12188 [15:11:15<11:49:26, 8.26s/it] 58%|█████▊ | 7038/12188 [15:11:23<11:30:52, 8.05s/it] {'loss': 0.321, 'grad_norm': 0.6784854497463945, 'learning_rate': 3.997034325367185e-06, 'epoch': 0.58} + 58%|█████▊ | 7038/12188 [15:11:23<11:30:52, 8.05s/it] 58%|█████▊ | 7039/12188 [15:11:30<11:10:46, 7.82s/it] {'loss': 0.2935, 'grad_norm': 0.6205135586072251, 'learning_rate': 3.9957326611507375e-06, 'epoch': 0.58} + 58%|█████▊ | 7039/12188 [15:11:30<11:10:46, 7.82s/it] 58%|█████▊ | 7040/12188 [15:11:37<11:02:07, 7.72s/it] {'loss': 0.3089, 'grad_norm': 0.6615267459066195, 'learning_rate': 3.994431067854034e-06, 'epoch': 0.58} + 58%|█████▊ | 7040/12188 [15:11:37<11:02:07, 7.72s/it] 58%|█████▊ | 7041/12188 [15:11:44<10:38:24, 7.44s/it] {'loss': 0.3345, 'grad_norm': 0.6345931054594947, 'learning_rate': 3.9931295455689925e-06, 'epoch': 0.58} + 58%|█████▊ | 7041/12188 [15:11:44<10:38:24, 7.44s/it] 58%|█████▊ | 7042/12188 [15:11:51<10:31:32, 7.36s/it] {'loss': 0.2974, 'grad_norm': 0.7363490729105718, 'learning_rate': 3.991828094387527e-06, 'epoch': 0.58} + 58%|█████▊ | 7042/12188 [15:11:51<10:31:32, 7.36s/it] 58%|█████▊ | 7043/12188 [15:11:59<10:42:07, 7.49s/it] {'loss': 0.3293, 'grad_norm': 0.6242427207801468, 'learning_rate': 3.9905267144015405e-06, 'epoch': 0.58} + 58%|█████▊ | 7043/12188 [15:11:59<10:42:07, 7.49s/it] 58%|█████▊ | 7044/12188 [15:12:06<10:32:19, 7.38s/it] {'loss': 0.3325, 'grad_norm': 0.6696329665190742, 'learning_rate': 3.989225405702937e-06, 'epoch': 0.58} + 58%|█████▊ | 7044/12188 [15:12:06<10:32:19, 7.38s/it] 58%|█████▊ | 7045/12188 [15:12:13<10:25:06, 7.29s/it] {'loss': 0.3091, 'grad_norm': 0.7508968003214228, 'learning_rate': 3.987924168383609e-06, 'epoch': 0.58} + 58%|█████▊ | 7045/12188 [15:12:13<10:25:06, 7.29s/it] 58%|█████▊ | 7046/12188 [15:12:20<10:11:48, 7.14s/it] {'loss': 0.328, 'grad_norm': 0.9584492564019172, 'learning_rate': 3.986623002535451e-06, 'epoch': 0.58} + 58%|█████▊ | 7046/12188 [15:12:20<10:11:48, 7.14s/it] 58%|█████▊ | 7047/12188 [15:12:28<10:26:21, 7.31s/it] {'loss': 0.3152, 'grad_norm': 0.6467305190817915, 'learning_rate': 3.98532190825035e-06, 'epoch': 0.58} + 58%|█████▊ | 7047/12188 [15:12:28<10:26:21, 7.31s/it] 58%|█████▊ | 7048/12188 [15:12:35<10:13:27, 7.16s/it] {'loss': 0.331, 'grad_norm': 0.6832835532038457, 'learning_rate': 3.984020885620184e-06, 'epoch': 0.58} + 58%|█████▊ | 7048/12188 [15:12:35<10:13:27, 7.16s/it] 58%|█████▊ | 7049/12188 [15:12:41<10:02:25, 7.03s/it] {'loss': 0.3017, 'grad_norm': 0.6133288643854968, 'learning_rate': 3.982719934736832e-06, 'epoch': 0.58} + 58%|█████▊ | 7049/12188 [15:12:41<10:02:25, 7.03s/it] 58%|█████▊ | 7050/12188 [15:12:48<10:03:51, 7.05s/it] {'loss': 0.3205, 'grad_norm': 0.6444804286684577, 'learning_rate': 3.981419055692163e-06, 'epoch': 0.58} + 58%|█████▊ | 7050/12188 [15:12:48<10:03:51, 7.05s/it] 58%|█████▊ | 7051/12188 [15:12:55<9:59:59, 7.01s/it] {'loss': 0.3191, 'grad_norm': 0.7362409470660659, 'learning_rate': 3.980118248578044e-06, 'epoch': 0.58} + 58%|█████▊ | 7051/12188 [15:12:55<9:59:59, 7.01s/it] 58%|█████▊ | 7052/12188 [15:13:03<10:16:25, 7.20s/it] {'loss': 0.3563, 'grad_norm': 0.7469665419351849, 'learning_rate': 3.978817513486336e-06, 'epoch': 0.58} + 58%|█████▊ | 7052/12188 [15:13:03<10:16:25, 7.20s/it] 58%|█████▊ | 7053/12188 [15:13:10<10:05:46, 7.08s/it] {'loss': 0.3278, 'grad_norm': 0.6283500330473225, 'learning_rate': 3.9775168505088935e-06, 'epoch': 0.58} + 58%|█████▊ | 7053/12188 [15:13:10<10:05:46, 7.08s/it] 58%|█████▊ | 7054/12188 [15:13:17<9:55:41, 6.96s/it] {'loss': 0.3186, 'grad_norm': 0.6822687519853526, 'learning_rate': 3.97621625973757e-06, 'epoch': 0.58} + 58%|█████▊ | 7054/12188 [15:13:17<9:55:41, 6.96s/it] 58%|█████▊ | 7055/12188 [15:13:23<9:49:12, 6.89s/it] {'loss': 0.3376, 'grad_norm': 0.8770899120585389, 'learning_rate': 3.974915741264207e-06, 'epoch': 0.58} + 58%|█████▊ | 7055/12188 [15:13:23<9:49:12, 6.89s/it] 58%|█████▊ | 7056/12188 [15:13:31<10:00:49, 7.02s/it] {'loss': 0.3627, 'grad_norm': 0.8526754569490341, 'learning_rate': 3.97361529518065e-06, 'epoch': 0.58} + 58%|█████▊ | 7056/12188 [15:13:31<10:00:49, 7.02s/it] 58%|█████▊ | 7057/12188 [15:13:37<9:56:56, 6.98s/it] {'loss': 0.3134, 'grad_norm': 0.6296284779829309, 'learning_rate': 3.97231492157873e-06, 'epoch': 0.58} + 58%|█████▊ | 7057/12188 [15:13:37<9:56:56, 6.98s/it] 58%|█████▊ | 7058/12188 [15:13:44<9:48:55, 6.89s/it] {'loss': 0.3327, 'grad_norm': 0.782384296001675, 'learning_rate': 3.97101462055028e-06, 'epoch': 0.58} + 58%|█████▊ | 7058/12188 [15:13:44<9:48:55, 6.89s/it] 58%|█████▊ | 7059/12188 [15:13:51<9:50:52, 6.91s/it] {'loss': 0.3049, 'grad_norm': 0.6436598779039309, 'learning_rate': 3.969714392187123e-06, 'epoch': 0.58} + 58%|█████▊ | 7059/12188 [15:13:51<9:50:52, 6.91s/it] 58%|█████▊ | 7060/12188 [15:13:58<9:53:05, 6.94s/it] {'loss': 0.3307, 'grad_norm': 0.711515625217179, 'learning_rate': 3.968414236581083e-06, 'epoch': 0.58} + 58%|█████▊ | 7060/12188 [15:13:58<9:53:05, 6.94s/it] 58%|█████▊ | 7061/12188 [15:14:05<10:00:24, 7.03s/it] {'loss': 0.3295, 'grad_norm': 0.6833762085966762, 'learning_rate': 3.967114153823969e-06, 'epoch': 0.58} + 58%|█████▊ | 7061/12188 [15:14:05<10:00:24, 7.03s/it] 58%|█████▊ | 7062/12188 [15:14:12<9:52:13, 6.93s/it] {'loss': 0.323, 'grad_norm': 0.6715673261747686, 'learning_rate': 3.965814144007597e-06, 'epoch': 0.58} + 58%|█████▊ | 7062/12188 [15:14:12<9:52:13, 6.93s/it] 58%|█████▊ | 7063/12188 [15:14:20<10:25:55, 7.33s/it] {'loss': 0.3178, 'grad_norm': 0.7265214758726792, 'learning_rate': 3.964514207223766e-06, 'epoch': 0.58} + 58%|█████▊ | 7063/12188 [15:14:20<10:25:55, 7.33s/it] 58%|█████▊ | 7064/12188 [15:14:27<10:07:40, 7.12s/it] {'loss': 0.3052, 'grad_norm': 0.7637256399583046, 'learning_rate': 3.9632143435642794e-06, 'epoch': 0.58} + 58%|█████▊ | 7064/12188 [15:14:27<10:07:40, 7.12s/it] 58%|█████▊ | 7065/12188 [15:14:36<10:57:50, 7.70s/it] {'loss': 0.31, 'grad_norm': 0.6637979998593568, 'learning_rate': 3.961914553120932e-06, 'epoch': 0.58} + 58%|█████▊ | 7065/12188 [15:14:36<10:57:50, 7.70s/it] 58%|█████▊ | 7066/12188 [15:14:43<10:47:49, 7.59s/it] {'loss': 0.3239, 'grad_norm': 0.6439715002950562, 'learning_rate': 3.960614835985507e-06, 'epoch': 0.58} + 58%|█████▊ | 7066/12188 [15:14:43<10:47:49, 7.59s/it] 58%|█████▊ | 7067/12188 [15:14:51<10:42:00, 7.52s/it] {'loss': 0.3155, 'grad_norm': 0.6749593373751774, 'learning_rate': 3.959315192249796e-06, 'epoch': 0.58} + 58%|█████▊ | 7067/12188 [15:14:51<10:42:00, 7.52s/it] 58%|█████▊ | 7068/12188 [15:14:58<10:28:23, 7.36s/it] {'loss': 0.3224, 'grad_norm': 0.6231114511518994, 'learning_rate': 3.958015622005572e-06, 'epoch': 0.58} + 58%|█████▊ | 7068/12188 [15:14:58<10:28:23, 7.36s/it] 58%|█���███▊ | 7069/12188 [15:15:05<10:19:09, 7.26s/it] {'loss': 0.3417, 'grad_norm': 0.6667287078216324, 'learning_rate': 3.95671612534461e-06, 'epoch': 0.58} + 58%|█████▊ | 7069/12188 [15:15:05<10:19:09, 7.26s/it] 58%|█████▊ | 7070/12188 [15:15:12<10:30:15, 7.39s/it] {'loss': 0.3114, 'grad_norm': 0.6213101741666504, 'learning_rate': 3.955416702358681e-06, 'epoch': 0.58} + 58%|█████▊ | 7070/12188 [15:15:12<10:30:15, 7.39s/it] 58%|█████▊ | 7071/12188 [15:15:20<10:33:47, 7.43s/it] {'loss': 0.2947, 'grad_norm': 0.6324006110994637, 'learning_rate': 3.954117353139546e-06, 'epoch': 0.58} + 58%|█████▊ | 7071/12188 [15:15:20<10:33:47, 7.43s/it] 58%|█████▊ | 7072/12188 [15:15:28<10:55:28, 7.69s/it] {'loss': 0.3101, 'grad_norm': 0.6232425363301958, 'learning_rate': 3.9528180777789644e-06, 'epoch': 0.58} + 58%|█████▊ | 7072/12188 [15:15:28<10:55:28, 7.69s/it] 58%|█████▊ | 7073/12188 [15:15:35<10:45:33, 7.57s/it] {'loss': 0.3208, 'grad_norm': 0.6341785803235916, 'learning_rate': 3.951518876368685e-06, 'epoch': 0.58} + 58%|█████▊ | 7073/12188 [15:15:35<10:45:33, 7.57s/it] 58%|█████▊ | 7074/12188 [15:15:43<10:49:57, 7.63s/it] {'loss': 0.3114, 'grad_norm': 0.6669559432445702, 'learning_rate': 3.950219749000458e-06, 'epoch': 0.58} + 58%|█████▊ | 7074/12188 [15:15:43<10:49:57, 7.63s/it] 58%|█████▊ | 7075/12188 [15:15:50<10:28:45, 7.38s/it] {'loss': 0.3159, 'grad_norm': 0.7841255045747927, 'learning_rate': 3.9489206957660284e-06, 'epoch': 0.58} + 58%|█████▊ | 7075/12188 [15:15:50<10:28:45, 7.38s/it] 58%|█████▊ | 7076/12188 [15:15:58<10:34:51, 7.45s/it] {'loss': 0.2985, 'grad_norm': 0.7047042714882914, 'learning_rate': 3.947621716757128e-06, 'epoch': 0.58} + 58%|█████▊ | 7076/12188 [15:15:58<10:34:51, 7.45s/it] 58%|█████▊ | 7077/12188 [15:16:05<10:22:12, 7.30s/it] {'loss': 0.3296, 'grad_norm': 0.6534534466867709, 'learning_rate': 3.9463228120654926e-06, 'epoch': 0.58} + 58%|█████▊ | 7077/12188 [15:16:05<10:22:12, 7.30s/it] 58%|█████▊ | 7078/12188 [15:16:12<10:12:48, 7.20s/it] {'loss': 0.3322, 'grad_norm': 0.6608091541839569, 'learning_rate': 3.945023981782848e-06, 'epoch': 0.58} + 58%|█████▊ | 7078/12188 [15:16:12<10:12:48, 7.20s/it] 58%|█████▊ | 7079/12188 [15:16:19<10:06:13, 7.12s/it] {'loss': 0.3473, 'grad_norm': 0.689433687168061, 'learning_rate': 3.943725226000913e-06, 'epoch': 0.58} + 58%|█████▊ | 7079/12188 [15:16:19<10:06:13, 7.12s/it] 58%|█████▊ | 7080/12188 [15:16:25<10:01:46, 7.07s/it] {'loss': 0.2979, 'grad_norm': 0.7239795294334047, 'learning_rate': 3.942426544811407e-06, 'epoch': 0.58} + 58%|█████▊ | 7080/12188 [15:16:25<10:01:46, 7.07s/it] 58%|█████▊ | 7081/12188 [15:16:32<9:49:04, 6.92s/it] {'loss': 0.3562, 'grad_norm': 0.7406484587023205, 'learning_rate': 3.941127938306038e-06, 'epoch': 0.58} + 58%|█████▊ | 7081/12188 [15:16:32<9:49:04, 6.92s/it] 58%|█████▊ | 7082/12188 [15:16:41<10:39:50, 7.52s/it] {'loss': 0.3113, 'grad_norm': 0.6190174065976299, 'learning_rate': 3.939829406576512e-06, 'epoch': 0.58} + 58%|█████▊ | 7082/12188 [15:16:41<10:39:50, 7.52s/it] 58%|█████▊ | 7083/12188 [15:16:48<10:25:48, 7.36s/it] {'loss': 0.3177, 'grad_norm': 0.6450454865140072, 'learning_rate': 3.938530949714533e-06, 'epoch': 0.58} + 58%|█████▊ | 7083/12188 [15:16:48<10:25:48, 7.36s/it] 58%|█████▊ | 7084/12188 [15:16:58<11:33:04, 8.15s/it] {'loss': 0.2971, 'grad_norm': 0.6309644916438866, 'learning_rate': 3.9372325678117916e-06, 'epoch': 0.58} + 58%|█████▊ | 7084/12188 [15:16:58<11:33:04, 8.15s/it] 58%|█████▊ | 7085/12188 [15:17:05<11:03:37, 7.80s/it] {'loss': 0.3042, 'grad_norm': 0.7088865937371482, 'learning_rate': 3.935934260959978e-06, 'epoch': 0.58} + 58%|█████▊ | 7085/12188 [15:17:05<11:03:37, 7.80s/it] 58%|█████▊ | 7086/12188 [15:17:12<10:35:33, 7.47s/it] {'loss': 0.3041, 'grad_norm': 0.6539693795584257, 'learning_rate': 3.934636029250778e-06, 'epoch': 0.58} + 58%|█████▊ | 7086/12188 [15:17:12<10:35:33, 7.47s/it] 58%|█████▊ | 7087/12188 [15:17:18<10:14:18, 7.23s/it] {'loss': 0.3267, 'grad_norm': 0.7274804773765053, 'learning_rate': 3.933337872775869e-06, 'epoch': 0.58} + 58%|█████▊ | 7087/12188 [15:17:18<10:14:18, 7.23s/it] 58%|█████▊ | 7088/12188 [15:17:27<11:00:54, 7.78s/it] {'loss': 0.3116, 'grad_norm': 0.6465491574664447, 'learning_rate': 3.932039791626928e-06, 'epoch': 0.58} + 58%|█████▊ | 7088/12188 [15:17:27<11:00:54, 7.78s/it] 58%|█████▊ | 7089/12188 [15:17:35<10:57:22, 7.74s/it] {'loss': 0.3119, 'grad_norm': 0.6351010343525784, 'learning_rate': 3.9307417858956185e-06, 'epoch': 0.58} + 58%|█████▊ | 7089/12188 [15:17:35<10:57:22, 7.74s/it] 58%|█████▊ | 7090/12188 [15:17:42<10:39:17, 7.52s/it] {'loss': 0.3027, 'grad_norm': 0.6892604857598336, 'learning_rate': 3.9294438556736084e-06, 'epoch': 0.58} + 58%|█████▊ | 7090/12188 [15:17:42<10:39:17, 7.52s/it] 58%|█████▊ | 7091/12188 [15:17:49<10:22:20, 7.33s/it] {'loss': 0.3267, 'grad_norm': 0.6797616343118371, 'learning_rate': 3.928146001052552e-06, 'epoch': 0.58} + 58%|█████▊ | 7091/12188 [15:17:49<10:22:20, 7.33s/it] 58%|█████▊ | 7092/12188 [15:17:56<10:14:42, 7.24s/it] {'loss': 0.2928, 'grad_norm': 0.6173215327651256, 'learning_rate': 3.926848222124103e-06, 'epoch': 0.58} + 58%|█████▊ | 7092/12188 [15:17:56<10:14:42, 7.24s/it] 58%|█████▊ | 7093/12188 [15:18:05<11:00:29, 7.78s/it] {'loss': 0.3225, 'grad_norm': 0.625427632986777, 'learning_rate': 3.92555051897991e-06, 'epoch': 0.58} + 58%|█████▊ | 7093/12188 [15:18:05<11:00:29, 7.78s/it] 58%|█████▊ | 7094/12188 [15:18:12<10:32:25, 7.45s/it] {'loss': 0.3225, 'grad_norm': 0.6490537748896813, 'learning_rate': 3.92425289171161e-06, 'epoch': 0.58} + 58%|█████▊ | 7094/12188 [15:18:12<10:32:25, 7.45s/it] 58%|█████▊ | 7095/12188 [15:18:19<10:22:03, 7.33s/it] {'loss': 0.3089, 'grad_norm': 0.6650790892971045, 'learning_rate': 3.922955340410844e-06, 'epoch': 0.58} + 58%|█████▊ | 7095/12188 [15:18:19<10:22:03, 7.33s/it] 58%|█████▊ | 7096/12188 [15:18:25<10:04:46, 7.13s/it] {'loss': 0.3746, 'grad_norm': 0.7575538917799592, 'learning_rate': 3.921657865169242e-06, 'epoch': 0.58} + 58%|█████▊ | 7096/12188 [15:18:25<10:04:46, 7.13s/it] 58%|█████▊ | 7097/12188 [15:18:32<9:53:00, 6.99s/it] {'loss': 0.3307, 'grad_norm': 0.7238834980676823, 'learning_rate': 3.920360466078428e-06, 'epoch': 0.58} + 58%|█████▊ | 7097/12188 [15:18:32<9:53:00, 6.99s/it] 58%|█████▊ | 7098/12188 [15:18:39<10:00:35, 7.08s/it] {'loss': 0.3156, 'grad_norm': 0.6192677796303321, 'learning_rate': 3.9190631432300255e-06, 'epoch': 0.58} + 58%|█████▊ | 7098/12188 [15:18:39<10:00:35, 7.08s/it] 58%|█████▊ | 7099/12188 [15:18:46<9:54:39, 7.01s/it] {'loss': 0.323, 'grad_norm': 0.8259690397297306, 'learning_rate': 3.917765896715645e-06, 'epoch': 0.58} + 58%|█████▊ | 7099/12188 [15:18:46<9:54:39, 7.01s/it] 58%|█████▊ | 7100/12188 [15:18:53<9:59:43, 7.07s/it] {'loss': 0.3143, 'grad_norm': 0.6816451998344789, 'learning_rate': 3.916468726626898e-06, 'epoch': 0.58} + 58%|█████▊ | 7100/12188 [15:18:53<9:59:43, 7.07s/it] 58%|█████▊ | 7101/12188 [15:19:00<9:53:21, 7.00s/it] {'loss': 0.3161, 'grad_norm': 0.7236855927731777, 'learning_rate': 3.915171633055391e-06, 'epoch': 0.58} + 58%|█████▊ | 7101/12188 [15:19:00<9:53:21, 7.00s/it] 58%|█████▊ | 7102/12188 [15:19:08<10:12:51, 7.23s/it] {'loss': 0.2982, 'grad_norm': 0.694377700001339, 'learning_rate': 3.913874616092718e-06, 'epoch': 0.58} + 58%|█████▊ | 7102/12188 [15:19:08<10:12:51, 7.23s/it] 58%|█████▊ | 7103/12188 [15:19:16<10:29:10, 7.42s/it] {'loss': 0.3415, 'grad_norm': 0.6870768707494438, 'learning_rate': 3.912577675830476e-06, 'epoch': 0.58} + 58%|█████▊ | 7103/12188 [15:19:16<10:29:10, 7.42s/it] 58%|█████▊ | 7104/12188 [15:19:23<10:18:21, 7.30s/it] {'loss': 0.3232, 'grad_norm': 0.6981205608860701, 'learning_rate': 3.91128081236025e-06, 'epoch': 0.58} + 58%|█████▊ | 7104/12188 [15:19:23<10:18:21, 7.30s/it] 58%|█████▊ | 7105/12188 [15:19:30<10:17:10, 7.29s/it] {'loss': 0.3612, 'grad_norm': 1.1734905690978235, 'learning_rate': 3.909984025773625e-06, 'epoch': 0.58} + 58%|█████▊ | 7105/12188 [15:19:30<10:17:10, 7.29s/it] 58%|█████▊ | 7106/12188 [15:19:37<9:56:27, 7.04s/it] {'loss': 0.3415, 'grad_norm': 0.7140004986468282, 'learning_rate': 3.908687316162178e-06, 'epoch': 0.58} + 58%|█████▊ | 7106/12188 [15:19:37<9:56:27, 7.04s/it] 58%|█████▊ | 7107/12188 [15:19:44<9:56:07, 7.04s/it] {'loss': 0.2806, 'grad_norm': 0.7059338097929733, 'learning_rate': 3.907390683617477e-06, 'epoch': 0.58} + 58%|█████▊ | 7107/12188 [15:19:44<9:56:07, 7.04s/it] 58%|█████▊ | 7108/12188 [15:19:52<10:42:13, 7.59s/it] {'loss': 0.3235, 'grad_norm': 0.6112723902393721, 'learning_rate': 3.906094128231093e-06, 'epoch': 0.58} + 58%|█████▊ | 7108/12188 [15:19:52<10:42:13, 7.59s/it] 58%|█████▊ | 7109/12188 [15:19:59<10:27:09, 7.41s/it] {'loss': 0.3138, 'grad_norm': 0.6883349668439013, 'learning_rate': 3.904797650094581e-06, 'epoch': 0.58} + 58%|█████▊ | 7109/12188 [15:19:59<10:27:09, 7.41s/it] 58%|█████▊ | 7110/12188 [15:20:06<10:15:58, 7.28s/it] {'loss': 0.3022, 'grad_norm': 0.6540419296387142, 'learning_rate': 3.9035012492995004e-06, 'epoch': 0.58} + 58%|█████▊ | 7110/12188 [15:20:06<10:15:58, 7.28s/it] 58%|█████▊ | 7111/12188 [15:20:15<10:38:28, 7.55s/it] {'loss': 0.3211, 'grad_norm': 0.6660529412527436, 'learning_rate': 3.902204925937402e-06, 'epoch': 0.58} + 58%|█████▊ | 7111/12188 [15:20:15<10:38:28, 7.55s/it] 58%|█████▊ | 7112/12188 [15:20:23<10:50:12, 7.69s/it] {'loss': 0.2987, 'grad_norm': 0.790437207113533, 'learning_rate': 3.9009086800998266e-06, 'epoch': 0.58} + 58%|█████▊ | 7112/12188 [15:20:23<10:50:12, 7.69s/it] 58%|█████▊ | 7113/12188 [15:20:29<10:25:14, 7.39s/it] {'loss': 0.3447, 'grad_norm': 0.7444876432058228, 'learning_rate': 3.899612511878313e-06, 'epoch': 0.58} + 58%|█████▊ | 7113/12188 [15:20:29<10:25:14, 7.39s/it] 58%|█████▊ | 7114/12188 [15:20:36<10:15:53, 7.28s/it] {'loss': 0.3272, 'grad_norm': 0.69691914385101, 'learning_rate': 3.898316421364398e-06, 'epoch': 0.58} + 58%|█████▊ | 7114/12188 [15:20:36<10:15:53, 7.28s/it] 58%|█████▊ | 7115/12188 [15:20:43<9:57:45, 7.07s/it] {'loss': 0.2771, 'grad_norm': 0.6349887809909757, 'learning_rate': 3.897020408649607e-06, 'epoch': 0.58} + 58%|█████▊ | 7115/12188 [15:20:43<9:57:45, 7.07s/it] 58%|█████▊ | 7116/12188 [15:20:50<9:51:43, 7.00s/it] {'loss': 0.32, 'grad_norm': 0.6498870848821703, 'learning_rate': 3.895724473825463e-06, 'epoch': 0.58} + 58%|█████▊ | 7116/12188 [15:20:50<9:51:43, 7.00s/it] 58%|█████▊ | 7117/12188 [15:21:00<11:02:19, 7.84s/it] {'loss': 0.3028, 'grad_norm': 0.6826724446987634, 'learning_rate': 3.8944286169834815e-06, 'epoch': 0.58} + 58%|█████▊ | 7117/12188 [15:21:00<11:02:19, 7.84s/it] 58%|█████▊ | 7118/12188 [15:21:06<10:38:28, 7.56s/it] {'loss': 0.3063, 'grad_norm': 0.6565625120493439, 'learning_rate': 3.893132838215174e-06, 'epoch': 0.58} + 58%|█████▊ | 7118/12188 [15:21:06<10:38:28, 7.56s/it] 58%|█████▊ | 7119/12188 [15:21:13<10:18:39, 7.32s/it] {'loss': 0.2833, 'grad_norm': 0.7564100313443195, 'learning_rate': 3.891837137612049e-06, 'epoch': 0.58} + 58%|█████▊ | 7119/12188 [15:21:13<10:18:39, 7.32s/it] 58%|█████▊ | 7120/12188 [15:21:20<10:10:40, 7.23s/it] {'loss': 0.3379, 'grad_norm': 0.6695459789247694, 'learning_rate': 3.890541515265604e-06, 'epoch': 0.58} + 58%|█████▊ | 7120/12188 [15:21:20<10:10:40, 7.23s/it] 58%|█████▊ | 7121/12188 [15:21:28<10:19:57, 7.34s/it] {'loss': 0.2943, 'grad_norm': 0.6947277115320083, 'learning_rate': 3.889245971267336e-06, 'epoch': 0.58} + 58%|█████▊ | 7121/12188 [15:21:28<10:19:57, 7.34s/it] 58%|█████▊ | 7122/12188 [15:21:35<10:21:59, 7.37s/it] {'loss': 0.3136, 'grad_norm': 0.616039547900988, 'learning_rate': 3.887950505708731e-06, 'epoch': 0.58} + 58%|█████▊ | 7122/12188 [15:21:35<10:21:59, 7.37s/it] 58%|█████▊ | 7123/12188 [15:21:43<10:22:03, 7.37s/it] {'loss': 0.308, 'grad_norm': 0.6726608817780806, 'learning_rate': 3.886655118681275e-06, 'epoch': 0.58} + 58%|█████▊ | 7123/12188 [15:21:43<10:22:03, 7.37s/it] 58%|█████▊ | 7124/12188 [15:21:50<10:13:22, 7.27s/it] {'loss': 0.2767, 'grad_norm': 0.6417471240453699, 'learning_rate': 3.885359810276448e-06, 'epoch': 0.58} + 58%|█████▊ | 7124/12188 [15:21:50<10:13:22, 7.27s/it] 58%|█████▊ | 7125/12188 [15:21:57<10:10:06, 7.23s/it] {'loss': 0.3206, 'grad_norm': 0.760363229724296, 'learning_rate': 3.884064580585717e-06, 'epoch': 0.58} + 58%|█████▊ | 7125/12188 [15:21:57<10:10:06, 7.23s/it] 58%|█████▊ | 7126/12188 [15:22:04<10:14:10, 7.28s/it] {'loss': 0.3161, 'grad_norm': 0.8210428058699445, 'learning_rate': 3.882769429700556e-06, 'epoch': 0.58} + 58%|█████▊ | 7126/12188 [15:22:04<10:14:10, 7.28s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1348, in _get_item + raise ValueError( +ValueError: Number of image tokens ['data/slider/other_screenshot/original/AudioControlPanel_1739898931.4442017.png'] does not match number of images None +[Try #0] Failed to fetch sample 1865783 in VC:s3://gui-agent/jedi/images/component_v1_130k/component_v1_130k_extracted/. Exception: Number of image tokens ['data/slider/other_screenshot/original/AudioControlPanel_1739898931.4442017.png'] does not match number of images None +Problematic sample: {'image': 'data/slider/other_screenshot/original/AudioControlPanel_1739898931.4442017.png', 'conversations': [], 'image_id': 'data/slider/other_screenshot/original/AudioControlPanel_1739898931.4442017.png'} + 58%|█████▊ | 7127/12188 [15:22:11<9:58:08, 7.09s/it] {'loss': 0.3051, 'grad_norm': 0.7185227175375883, 'learning_rate': 3.88147435771242e-06, 'epoch': 0.58} + 58%|█████▊ | 7127/12188 [15:22:11<9:58:08, 7.09s/it] 58%|█████▊ | 7128/12188 [15:22:18<9:48:48, 6.98s/it] {'loss': 0.3244, 'grad_norm': 0.7842419390327371, 'learning_rate': 3.880179364712769e-06, 'epoch': 0.58} + 58%|█████▊ | 7128/12188 [15:22:18<9:48:48, 6.98s/it] 58%|█████▊ | 7129/12188 [15:22:24<9:44:46, 6.94s/it] {'loss': 0.318, 'grad_norm': 0.6804311368808922, 'learning_rate': 3.878884450793053e-06, 'epoch': 0.58} + 58%|█████▊ | 7129/12188 [15:22:24<9:44:46, 6.94s/it] 59%|█████▊ | 7130/12188 [15:22:32<9:50:08, 7.00s/it] {'loss': 0.3307, 'grad_norm': 0.7789190918684386, 'learning_rate': 3.8775896160447136e-06, 'epoch': 0.58} + 59%|█████▊ | 7130/12188 [15:22:32<9:50:08, 7.00s/it] 59%|█████▊ | 7131/12188 [15:22:38<9:46:54, 6.96s/it] {'loss': 0.3063, 'grad_norm': 0.6863130311748864, 'learning_rate': 3.8762948605591926e-06, 'epoch': 0.59} + 59%|█████▊ | 7131/12188 [15:22:38<9:46:54, 6.96s/it] 59%|█████▊ | 7132/12188 [15:22:45<9:44:07, 6.93s/it] {'loss': 0.3153, 'grad_norm': 0.7247532335649363, 'learning_rate': 3.875000184427925e-06, 'epoch': 0.59} + 59%|█████▊ | 7132/12188 [15:22:45<9:44:07, 6.93s/it] 59%|█████▊ | 7133/12188 [15:22:52<9:42:38, 6.92s/it] {'loss': 0.2994, 'grad_norm': 0.7518642801682311, 'learning_rate': 3.873705587742336e-06, 'epoch': 0.59} + 59%|█████▊ | 7133/12188 [15:22:52<9:42:38, 6.92s/it] 59%|█████▊ | 7134/12188 [15:22:59<9:31:31, 6.78s/it] {'loss': 0.3177, 'grad_norm': 0.709024819946285, 'learning_rate': 3.87241107059385e-06, 'epoch': 0.59} + 59%|█████▊ | 7134/12188 [15:22:59<9:31:31, 6.78s/it] 59%|█████▊ | 7135/12188 [15:23:06<9:34:47, 6.83s/it] {'loss': 0.3515, 'grad_norm': 0.6694662924412195, 'learning_rate': 3.8711166330738805e-06, 'epoch': 0.59} + 59%|█████▊ | 7135/12188 [15:23:06<9:34:47, 6.83s/it] 59%|█████▊ | 7136/12188 [15:23:13<9:53:25, 7.05s/it] {'loss': 0.3029, 'grad_norm': 0.6176459839849961, 'learning_rate': 3.869822275273841e-06, 'epoch': 0.59} + 59%|█████▊ | 7136/12188 [15:23:13<9:53:25, 7.05s/it] 59%|█████▊ | 7137/12188 [15:23:21<10:15:59, 7.32s/it] {'loss': 0.3123, 'grad_norm': 0.6611098381144256, 'learning_rate': 3.8685279972851385e-06, 'epoch': 0.59} + 59%|█████▊ | 7137/12188 [15:23:21<10:15:59, 7.32s/it] 59%|█████▊ | 7138/12188 [15:23:28<10:06:57, 7.21s/it] {'loss': 0.3179, 'grad_norm': 0.6500870580510151, 'learning_rate': 3.867233799199169e-06, 'epoch': 0.59} + 59%|█████▊ | 7138/12188 [15:23:28<10:06:57, 7.21s/it] 59%|█████▊ | 7139/12188 [15:23:35<10:08:09, 7.23s/it] {'loss': 0.3217, 'grad_norm': 0.9137029007399061, 'learning_rate': 3.86593968110733e-06, 'epoch': 0.59} + 59%|█████▊ | 7139/12188 [15:23:35<10:08:09, 7.23s/it] 59%|█████▊ | 7140/12188 [15:23:42<9:57:32, 7.10s/it] {'loss': 0.3135, 'grad_norm': 0.5885311209301591, 'learning_rate': 3.8646456431010086e-06, 'epoch': 0.59} + 59%|█████▊ | 7140/12188 [15:23:42<9:57:32, 7.10s/it] 59%|█████▊ | 7141/12188 [15:23:49<9:51:45, 7.03s/it] {'loss': 0.3795, 'grad_norm': 0.6493091590779807, 'learning_rate': 3.863351685271586e-06, 'epoch': 0.59} + 59%|█████▊ | 7141/12188 [15:23:49<9:51:45, 7.03s/it] 59%|█████▊ | 7142/12188 [15:23:59<10:58:39, 7.83s/it] {'loss': 0.2989, 'grad_norm': 0.7428204571022734, 'learning_rate': 3.8620578077104444e-06, 'epoch': 0.59} + 59%|█████▊ | 7142/12188 [15:23:59<10:58:39, 7.83s/it] 59%|█████▊ | 7143/12188 [15:24:06<10:48:22, 7.71s/it] {'loss': 0.3308, 'grad_norm': 0.7102592299900693, 'learning_rate': 3.860764010508949e-06, 'epoch': 0.59} + 59%|█████▊ | 7143/12188 [15:24:06<10:48:22, 7.71s/it] 59%|█████▊ | 7144/12188 [15:24:14<10:54:04, 7.78s/it] {'loss': 0.2897, 'grad_norm': 0.6891525533758004, 'learning_rate': 3.859470293758471e-06, 'epoch': 0.59} + 59%|█████▊ | 7144/12188 [15:24:14<10:54:04, 7.78s/it] 59%|█████▊ | 7145/12188 [15:24:21<10:26:22, 7.45s/it] {'loss': 0.3378, 'grad_norm': 0.7048727913594738, 'learning_rate': 3.858176657550367e-06, 'epoch': 0.59} + 59%|█████▊ | 7145/12188 [15:24:21<10:26:22, 7.45s/it] 59%|█████▊ | 7146/12188 [15:24:28<10:12:17, 7.29s/it] {'loss': 0.2758, 'grad_norm': 0.6760663331657984, 'learning_rate': 3.856883101975994e-06, 'epoch': 0.59} + 59%|█████▊ | 7146/12188 [15:24:28<10:12:17, 7.29s/it] 59%|█████▊ | 7147/12188 [15:24:35<10:15:35, 7.33s/it] {'loss': 0.3089, 'grad_norm': 0.6783123061345858, 'learning_rate': 3.855589627126699e-06, 'epoch': 0.59} + 59%|█████▊ | 7147/12188 [15:24:35<10:15:35, 7.33s/it] 59%|█████▊ | 7148/12188 [15:24:44<10:45:03, 7.68s/it] {'loss': 0.2854, 'grad_norm': 0.6376470609416038, 'learning_rate': 3.8542962330938266e-06, 'epoch': 0.59} + 59%|█████▊ | 7148/12188 [15:24:44<10:45:03, 7.68s/it] 59%|█████▊ | 7149/12188 [15:24:51<10:27:30, 7.47s/it] {'loss': 0.304, 'grad_norm': 0.7530538550280989, 'learning_rate': 3.853002919968712e-06, 'epoch': 0.59} + 59%|█████▊ | 7149/12188 [15:24:51<10:27:30, 7.47s/it] 59%|█████▊ | 7150/12188 [15:24:58<10:17:46, 7.36s/it] {'loss': 0.31, 'grad_norm': 0.6427119968910059, 'learning_rate': 3.851709687842691e-06, 'epoch': 0.59} + 59%|█████▊ | 7150/12188 [15:24:58<10:17:46, 7.36s/it] 59%|█████▊ | 7151/12188 [15:25:06<10:37:57, 7.60s/it] {'loss': 0.3257, 'grad_norm': 0.7937034792899227, 'learning_rate': 3.850416536807085e-06, 'epoch': 0.59} + 59%|█████▊ | 7151/12188 [15:25:06<10:37:57, 7.60s/it] 59%|█████▊ | 7152/12188 [15:25:12<10:11:50, 7.29s/it] {'loss': 0.3101, 'grad_norm': 0.7397425394525285, 'learning_rate': 3.849123466953217e-06, 'epoch': 0.59} + 59%|█████▊ | 7152/12188 [15:25:12<10:11:50, 7.29s/it] 59%|█████▊ | 7153/12188 [15:25:21<10:43:39, 7.67s/it] {'loss': 0.3587, 'grad_norm': 0.8411283318933851, 'learning_rate': 3.8478304783724e-06, 'epoch': 0.59} + 59%|█████▊ | 7153/12188 [15:25:21<10:43:39, 7.67s/it] 59%|█████▊ | 7154/12188 [15:25:28<10:26:14, 7.46s/it] {'loss': 0.344, 'grad_norm': 0.6581912306524748, 'learning_rate': 3.846537571155944e-06, 'epoch': 0.59} + 59%|█████▊ | 7154/12188 [15:25:28<10:26:14, 7.46s/it] 59%|█████▊ | 7155/12188 [15:25:35<10:19:49, 7.39s/it] {'loss': 0.2758, 'grad_norm': 0.5586088349081809, 'learning_rate': 3.845244745395153e-06, 'epoch': 0.59} + 59%|█████▊ | 7155/12188 [15:25:35<10:19:49, 7.39s/it] 59%|█████▊ | 7156/12188 [15:25:46<11:35:57, 8.30s/it] {'loss': 0.3222, 'grad_norm': 0.6179859722235006, 'learning_rate': 3.84395200118132e-06, 'epoch': 0.59} + 59%|█████▊ | 7156/12188 [15:25:46<11:35:57, 8.30s/it] 59%|█████▊ | 7157/12188 [15:25:54<11:28:06, 8.21s/it] {'loss': 0.3251, 'grad_norm': 0.7441055697535667, 'learning_rate': 3.842659338605742e-06, 'epoch': 0.59} + 59%|█████▊ | 7157/12188 [15:25:54<11:28:06, 8.21s/it] 59%|█████▊ | 7158/12188 [15:26:00<10:51:38, 7.77s/it] {'loss': 0.3388, 'grad_norm': 0.8716257247250763, 'learning_rate': 3.8413667577597e-06, 'epoch': 0.59} + 59%|█████▊ | 7158/12188 [15:26:00<10:51:38, 7.77s/it] 59%|█████▊ | 7159/12188 [15:26:07<10:20:28, 7.40s/it] {'loss': 0.3172, 'grad_norm': 0.5978289656440595, 'learning_rate': 3.840074258734476e-06, 'epoch': 0.59} + 59%|█████▊ | 7159/12188 [15:26:07<10:20:28, 7.40s/it] 59%|█████▊ | 7160/12188 [15:26:14<10:25:22, 7.46s/it] {'loss': 0.3446, 'grad_norm': 0.709926111460126, 'learning_rate': 3.838781841621347e-06, 'epoch': 0.59} + 59%|█████▊ | 7160/12188 [15:26:14<10:25:22, 7.46s/it] 59%|█████▉ | 7161/12188 [15:26:23<10:40:30, 7.64s/it] {'loss': 0.2931, 'grad_norm': 0.721083949680846, 'learning_rate': 3.8374895065115765e-06, 'epoch': 0.59} + 59%|█████▉ | 7161/12188 [15:26:23<10:40:30, 7.64s/it] 59%|█████▉ | 7162/12188 [15:26:29<10:20:56, 7.41s/it] {'loss': 0.2835, 'grad_norm': 0.651152241032542, 'learning_rate': 3.836197253496431e-06, 'epoch': 0.59} + 59%|█████▉ | 7162/12188 [15:26:29<10:20:56, 7.41s/it] 59%|█████▉ | 7163/12188 [15:26:36<10:07:02, 7.25s/it] {'loss': 0.2712, 'grad_norm': 0.5880737518424691, 'learning_rate': 3.834905082667164e-06, 'epoch': 0.59} + 59%|█████▉ | 7163/12188 [15:26:36<10:07:02, 7.25s/it] 59%|█████▉ | 7164/12188 [15:26:44<10:09:35, 7.28s/it] {'loss': 0.3508, 'grad_norm': 0.6524215039231293, 'learning_rate': 3.8336129941150274e-06, 'epoch': 0.59} + 59%|█████▉ | 7164/12188 [15:26:44<10:09:35, 7.28s/it] 59%|█████▉ | 7165/12188 [15:26:52<10:43:34, 7.69s/it] {'loss': 0.3079, 'grad_norm': 0.6378763689157145, 'learning_rate': 3.832320987931269e-06, 'epoch': 0.59} + 59%|█████▉ | 7165/12188 [15:26:52<10:43:34, 7.69s/it] 59%|█████▉ | 7166/12188 [15:26:59<10:17:42, 7.38s/it] {'loss': 0.3329, 'grad_norm': 0.7584176422430177, 'learning_rate': 3.831029064207126e-06, 'epoch': 0.59} + 59%|█████▉ | 7166/12188 [15:26:59<10:17:42, 7.38s/it] 59%|█████▉ | 7167/12188 [15:27:06<10:12:17, 7.32s/it] {'loss': 0.3289, 'grad_norm': 0.6382100276205213, 'learning_rate': 3.829737223033832e-06, 'epoch': 0.59} + 59%|█████▉ | 7167/12188 [15:27:06<10:12:17, 7.32s/it] 59%|█████▉ | 7168/12188 [15:27:13<9:54:46, 7.11s/it] {'loss': 0.3038, 'grad_norm': 0.8783902679919763, 'learning_rate': 3.828445464502616e-06, 'epoch': 0.59} + 59%|█████▉ | 7168/12188 [15:27:13<9:54:46, 7.11s/it] 59%|█████▉ | 7169/12188 [15:27:20<10:00:54, 7.18s/it] {'loss': 0.3337, 'grad_norm': 0.6519960633993306, 'learning_rate': 3.8271537887046976e-06, 'epoch': 0.59} + 59%|█████▉ | 7169/12188 [15:27:20<10:00:54, 7.18s/it] 59%|█████▉ | 7170/12188 [15:27:28<10:24:24, 7.47s/it] {'loss': 0.3087, 'grad_norm': 0.6430223353774616, 'learning_rate': 3.825862195731297e-06, 'epoch': 0.59} + 59%|█████▉ | 7170/12188 [15:27:28<10:24:24, 7.47s/it] 59%|█████▉ | 7171/12188 [15:27:35<10:15:18, 7.36s/it] {'loss': 0.3297, 'grad_norm': 0.6713441669531096, 'learning_rate': 3.824570685673618e-06, 'epoch': 0.59} + 59%|█████▉ | 7171/12188 [15:27:35<10:15:18, 7.36s/it] 59%|█████▉ | 7172/12188 [15:27:42<9:57:25, 7.15s/it] {'loss': 0.3528, 'grad_norm': 0.7800091339648594, 'learning_rate': 3.82327925862287e-06, 'epoch': 0.59} + 59%|█████▉ | 7172/12188 [15:27:42<9:57:25, 7.15s/it] 59%|█████▉ | 7173/12188 [15:27:49<9:50:49, 7.07s/it] {'loss': 0.3244, 'grad_norm': 0.7228640424780296, 'learning_rate': 3.821987914670252e-06, 'epoch': 0.59} + 59%|█████▉ | 7173/12188 [15:27:49<9:50:49, 7.07s/it] 59%|█████▉ | 7174/12188 [15:27:56<9:58:08, 7.16s/it] {'loss': 0.2868, 'grad_norm': 0.6573493540278731, 'learning_rate': 3.820696653906954e-06, 'epoch': 0.59} + 59%|█████▉ | 7174/12188 [15:27:56<9:58:08, 7.16s/it] 59%|█████▉ | 7175/12188 [15:28:03<9:47:40, 7.03s/it] {'loss': 0.3645, 'grad_norm': 0.6841963190315057, 'learning_rate': 3.819405476424164e-06, 'epoch': 0.59} + 59%|█████▉ | 7175/12188 [15:28:03<9:47:40, 7.03s/it] 59%|█████▉ | 7176/12188 [15:28:11<10:04:29, 7.24s/it] {'loss': 0.3216, 'grad_norm': 0.7212799106043073, 'learning_rate': 3.8181143823130615e-06, 'epoch': 0.59} + 59%|█████▉ | 7176/12188 [15:28:11<10:04:29, 7.24s/it] 59%|█████▉ | 7177/12188 [15:28:20<10:46:47, 7.74s/it] {'loss': 0.2825, 'grad_norm': 0.675665847980701, 'learning_rate': 3.8168233716648215e-06, 'epoch': 0.59} + 59%|█████▉ | 7177/12188 [15:28:20<10:46:47, 7.74s/it] 59%|█████▉ | 7178/12188 [15:28:27<10:34:42, 7.60s/it] {'loss': 0.3227, 'grad_norm': 0.7248134786581449, 'learning_rate': 3.815532444570617e-06, 'epoch': 0.59} + 59%|█████▉ | 7178/12188 [15:28:27<10:34:42, 7.60s/it] 59%|█████▉ | 7179/12188 [15:28:34<10:17:56, 7.40s/it] {'loss': 0.3194, 'grad_norm': 0.7021444340943291, 'learning_rate': 3.814241601121605e-06, 'epoch': 0.59} + 59%|█████▉ | 7179/12188 [15:28:34<10:17:56, 7.40s/it] 59%|█████▉ | 7180/12188 [15:28:41<10:18:22, 7.41s/it] {'loss': 0.3261, 'grad_norm': 0.7230799719223179, 'learning_rate': 3.812950841408949e-06, 'epoch': 0.59} + 59%|█████▉ | 7180/12188 [15:28:41<10:18:22, 7.41s/it] 59%|█████▉ | 7181/12188 [15:28:48<10:13:55, 7.36s/it] {'loss': 0.2793, 'grad_norm': 0.7352006059283654, 'learning_rate': 3.811660165523795e-06, 'epoch': 0.59} + 59%|█████▉ | 7181/12188 [15:28:48<10:13:55, 7.36s/it] 59%|█████▉ | 7182/12188 [15:28:55<9:54:52, 7.13s/it] {'loss': 0.3106, 'grad_norm': 0.6542192293466013, 'learning_rate': 3.8103695735572922e-06, 'epoch': 0.59} + 59%|█████▉ | 7182/12188 [15:28:55<9:54:52, 7.13s/it] 59%|█████▉ | 7183/12188 [15:29:02<9:56:38, 7.15s/it] {'loss': 0.3226, 'grad_norm': 0.7345820411028887, 'learning_rate': 3.8090790656005792e-06, 'epoch': 0.59} + 59%|█████▉ | 7183/12188 [15:29:02<9:56:38, 7.15s/it] 59%|█████▉ | 7184/12188 [15:29:09<9:53:54, 7.12s/it] {'loss': 0.3176, 'grad_norm': 0.6408745021308299, 'learning_rate': 3.807788641744788e-06, 'epoch': 0.59} + 59%|█████▉ | 7184/12188 [15:29:09<9:53:54, 7.12s/it] 59%|█████▉ | 7185/12188 [15:29:16<9:47:06, 7.04s/it] {'loss': 0.3265, 'grad_norm': 0.6917106406397342, 'learning_rate': 3.806498302081047e-06, 'epoch': 0.59} + 59%|█████▉ | 7185/12188 [15:29:16<9:47:06, 7.04s/it] 59%|█████▉ | 7186/12188 [15:29:23<9:37:31, 6.93s/it] {'loss': 0.2873, 'grad_norm': 0.6997254398933781, 'learning_rate': 3.8052080467004808e-06, 'epoch': 0.59} + 59%|█████▉ | 7186/12188 [15:29:23<9:37:31, 6.93s/it] 59%|█████▉ | 7187/12188 [15:29:30<9:50:22, 7.08s/it] {'loss': 0.2908, 'grad_norm': 0.7957212310715522, 'learning_rate': 3.803917875694201e-06, 'epoch': 0.59} + 59%|█████▉ | 7187/12188 [15:29:30<9:50:22, 7.08s/it] 59%|█████▉ | 7188/12188 [15:29:37<9:44:13, 7.01s/it] {'loss': 0.3145, 'grad_norm': 0.6818145975230553, 'learning_rate': 3.8026277891533203e-06, 'epoch': 0.59} + 59%|█████▉ | 7188/12188 [15:29:37<9:44:13, 7.01s/it] 59%|█████▉ | 7189/12188 [15:29:45<10:14:02, 7.37s/it] {'loss': 0.3348, 'grad_norm': 0.7735560625190547, 'learning_rate': 3.8013377871689406e-06, 'epoch': 0.59} + 59%|█████▉ | 7189/12188 [15:29:45<10:14:02, 7.37s/it] 59%|█████▉ | 7190/12188 [15:29:52<10:02:16, 7.23s/it] {'loss': 0.2822, 'grad_norm': 0.7839031778841977, 'learning_rate': 3.80004786983216e-06, 'epoch': 0.59} + 59%|█████▉ | 7190/12188 [15:29:52<10:02:16, 7.23s/it] 59%|█████▉ | 7191/12188 [15:29:59<9:49:23, 7.08s/it] {'loss': 0.2991, 'grad_norm': 0.7351420586940812, 'learning_rate': 3.7987580372340726e-06, 'epoch': 0.59} + 59%|█████▉ | 7191/12188 [15:29:59<9:49:23, 7.08s/it] 59%|█████▉ | 7192/12188 [15:30:06<9:39:37, 6.96s/it] {'loss': 0.3236, 'grad_norm': 0.6654759582515442, 'learning_rate': 3.797468289465761e-06, 'epoch': 0.59} + 59%|█████▉ | 7192/12188 [15:30:06<9:39:37, 6.96s/it] 59%|█████▉ | 7193/12188 [15:30:12<9:34:52, 6.91s/it] {'loss': 0.3095, 'grad_norm': 0.8712808455352973, 'learning_rate': 3.7961786266183086e-06, 'epoch': 0.59} + 59%|█████▉ | 7193/12188 [15:30:12<9:34:52, 6.91s/it] 59%|█████▉ | 7194/12188 [15:30:20<9:40:28, 6.97s/it] {'loss': 0.3305, 'grad_norm': 0.648544529532138, 'learning_rate': 3.7948890487827854e-06, 'epoch': 0.59} + 59%|█████▉ | 7194/12188 [15:30:20<9:40:28, 6.97s/it] 59%|█████▉ | 7195/12188 [15:30:27<9:56:56, 7.17s/it] {'loss': 0.2968, 'grad_norm': 0.6892681753941027, 'learning_rate': 3.7935995560502626e-06, 'epoch': 0.59} + 59%|█████▉ | 7195/12188 [15:30:27<9:56:56, 7.17s/it] 59%|█████▉ | 7196/12188 [15:30:35<10:03:46, 7.26s/it] {'loss': 0.2912, 'grad_norm': 0.5878559502861292, 'learning_rate': 3.792310148511802e-06, 'epoch': 0.59} + 59%|█████▉ | 7196/12188 [15:30:35<10:03:46, 7.26s/it] 59%|█████▉ | 7197/12188 [15:30:43<10:25:09, 7.52s/it] {'loss': 0.3502, 'grad_norm': 0.9695804095482218, 'learning_rate': 3.791020826258456e-06, 'epoch': 0.59} + 59%|█████▉ | 7197/12188 [15:30:43<10:25:09, 7.52s/it] 59%|█████▉ | 7198/12188 [15:30:50<10:10:46, 7.34s/it] {'loss': 0.3114, 'grad_norm': 0.7348195773644431, 'learning_rate': 3.7897315893812796e-06, 'epoch': 0.59} + 59%|█████▉ | 7198/12188 [15:30:50<10:10:46, 7.34s/it] 59%|█████▉ | 7199/12188 [15:30:58<10:39:11, 7.69s/it] {'loss': 0.3188, 'grad_norm': 0.6952812838149849, 'learning_rate': 3.7884424379713114e-06, 'epoch': 0.59} + 59%|█████▉ | 7199/12188 [15:30:58<10:39:11, 7.69s/it] 59%|█████▉ | 7200/12188 [15:31:05<10:23:55, 7.51s/it] {'loss': 0.3111, 'grad_norm': 0.6467866273126414, 'learning_rate': 3.787153372119592e-06, 'epoch': 0.59} + 59%|█████▉ | 7200/12188 [15:31:05<10:23:55, 7.51s/it] 59%|█████▉ | 7201/12188 [15:31:12<10:15:00, 7.40s/it] {'loss': 0.3166, 'grad_norm': 0.6691183368201904, 'learning_rate': 3.7858643919171543e-06, 'epoch': 0.59} + 59%|█████▉ | 7201/12188 [15:31:12<10:15:00, 7.40s/it] 59%|█████▉ | 7202/12188 [15:31:19<9:57:10, 7.19s/it] {'loss': 0.3301, 'grad_norm': 0.6963407064591152, 'learning_rate': 3.784575497455022e-06, 'epoch': 0.59} + 59%|█████▉ | 7202/12188 [15:31:19<9:57:10, 7.19s/it] 59%|█████▉ | 7203/12188 [15:31:26<9:56:13, 7.18s/it] {'loss': 0.2939, 'grad_norm': 0.7391155352090247, 'learning_rate': 3.783286688824214e-06, 'epoch': 0.59} + 59%|█████▉ | 7203/12188 [15:31:26<9:56:13, 7.18s/it] 59%|█████▉ | 7204/12188 [15:31:34<10:18:15, 7.44s/it] {'loss': 0.3503, 'grad_norm': 0.6799494405852019, 'learning_rate': 3.781997966115748e-06, 'epoch': 0.59} + 59%|█████▉ | 7204/12188 [15:31:34<10:18:15, 7.44s/it] 59%|█████▉ | 7205/12188 [15:31:45<11:33:27, 8.35s/it] {'loss': 0.305, 'grad_norm': 0.6702364502460492, 'learning_rate': 3.780709329420626e-06, 'epoch': 0.59} + 59%|█████▉ | 7205/12188 [15:31:45<11:33:27, 8.35s/it] 59%|█████▉ | 7206/12188 [15:31:53<11:33:08, 8.35s/it] {'loss': 0.313, 'grad_norm': 0.7591670734751982, 'learning_rate': 3.7794207788298547e-06, 'epoch': 0.59} + 59%|█████▉ | 7206/12188 [15:31:53<11:33:08, 8.35s/it] 59%|█████▉ | 7207/12188 [15:32:01<11:16:23, 8.15s/it] {'loss': 0.2943, 'grad_norm': 0.6639823950118168, 'learning_rate': 3.7781323144344246e-06, 'epoch': 0.59} + 59%|█████▉ | 7207/12188 [15:32:01<11:16:23, 8.15s/it] 59%|█████▉ | 7208/12188 [15:32:08<10:48:18, 7.81s/it] {'loss': 0.3411, 'grad_norm': 0.7266195361518968, 'learning_rate': 3.7768439363253285e-06, 'epoch': 0.59} + 59%|█████▉ | 7208/12188 [15:32:08<10:48:18, 7.81s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 90, in pil_loader + return img.convert("RGB") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 993, in convert + self.load() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 319, in load + raise _get_oserror(err_code, encoder=False) +OSError: unrecognized data stream contents when reading image file +[Try #0] Failed to fetch sample 6015570 in VC:s3://gui-agent/data_20250623/windows_augment/images. Exception: unrecognized data stream contents when reading image file +Problematic sample: {'image': 'inventor/20250513_095212_1/images/before_screenshot_1_id_68_internvl_position_crop_0_grounding_instructions_point_o_paste.png', 'conversations': [{'from': 'human', 'value': "\nOutput only the bounding box in your response. Located in the top ribbon toolbar of Autodesk Inventor 2019, positioned between the 'Model' button on the left and the 'Inspect' button on the right. It's part of the main tab navigation in the upper portion of the application window."}, {'from': 'gpt', 'value': "Located in the top ribbon toolbar of Autodesk Inventor 2019, positioned between the 'Model' button on the left and the 'Inspect' button on the right. It's part of the main tab navigation in the upper portion of the application window.[[443, 435, 471, 448]]"}], 'width': 2560, 'height': 1440} + 59%|█████▉ | 7209/12188 [15:32:15<10:42:30, 7.74s/it] {'loss': 0.3175, 'grad_norm': 0.625461482222722, 'learning_rate': 3.7755556445935485e-06, 'epoch': 0.59} + 59%|█████▉ | 7209/12188 [15:32:15<10:42:30, 7.74s/it] 59%|█████▉ | 7210/12188 [15:32:22<10:12:41, 7.38s/it] {'loss': 0.295, 'grad_norm': 0.7111427365721458, 'learning_rate': 3.7742674393300614e-06, 'epoch': 0.59} + 59%|█████▉ | 7210/12188 [15:32:22<10:12:41, 7.38s/it] 59%|█████▉ | 7211/12188 [15:32:29<10:14:19, 7.41s/it] {'loss': 0.3206, 'grad_norm': 0.7113071724881922, 'learning_rate': 3.77297932062584e-06, 'epoch': 0.59} + 59%|█████▉ | 7211/12188 [15:32:29<10:14:19, 7.41s/it] 59%|█████▉ | 7212/12188 [15:32:36<10:00:44, 7.24s/it] {'loss': 0.3243, 'grad_norm': 0.9696151828553659, 'learning_rate': 3.7716912885718455e-06, 'epoch': 0.59} + 59%|█████▉ | 7212/12188 [15:32:36<10:00:44, 7.24s/it] 59%|█████▉ | 7213/12188 [15:32:43<9:50:32, 7.12s/it] {'loss': 0.3116, 'grad_norm': 0.7139980241878989, 'learning_rate': 3.7704033432590387e-06, 'epoch': 0.59} + 59%|█████▉ | 7213/12188 [15:32:43<9:50:32, 7.12s/it] 59%|█████▉ | 7214/12188 [15:32:50<9:50:25, 7.12s/it] {'loss': 0.3132, 'grad_norm': 0.6688096625615642, 'learning_rate': 3.769115484778374e-06, 'epoch': 0.59} + 59%|█████▉ | 7214/12188 [15:32:50<9:50:25, 7.12s/it] 59%|█████▉ | 7215/12188 [15:32:58<9:56:01, 7.19s/it] {'loss': 0.3165, 'grad_norm': 0.6656830971446304, 'learning_rate': 3.7678277132207946e-06, 'epoch': 0.59} + 59%|█████▉ | 7215/12188 [15:32:58<9:56:01, 7.19s/it] 59%|█████▉ | 7216/12188 [15:33:05<10:09:08, 7.35s/it] {'loss': 0.3179, 'grad_norm': 0.651971566506506, 'learning_rate': 3.766540028677244e-06, 'epoch': 0.59} + 59%|█████▉ | 7216/12188 [15:33:05<10:09:08, 7.35s/it] 59%|█████▉ | 7217/12188 [15:33:13<10:06:42, 7.32s/it] {'loss': 0.3221, 'grad_norm': 0.6897508569575664, 'learning_rate': 3.7652524312386546e-06, 'epoch': 0.59} + 59%|█████▉ | 7217/12188 [15:33:13<10:06:42, 7.32s/it] 59%|█████▉ | 7218/12188 [15:33:20<10:14:57, 7.42s/it] {'loss': 0.3172, 'grad_norm': 0.7918441778213916, 'learning_rate': 3.763964920995953e-06, 'epoch': 0.59} + 59%|█████▉ | 7218/12188 [15:33:20<10:14:57, 7.42s/it] 59%|█████▉ | 7219/12188 [15:33:27<10:10:58, 7.38s/it] {'loss': 0.319, 'grad_norm': 0.6512561117097561, 'learning_rate': 3.7626774980400654e-06, 'epoch': 0.59} + 59%|█████▉ | 7219/12188 [15:33:28<10:10:58, 7.38s/it] 59%|█████▉ | 7220/12188 [15:33:35<10:02:45, 7.28s/it] {'loss': 0.3072, 'grad_norm': 0.7032321815420306, 'learning_rate': 3.7613901624619025e-06, 'epoch': 0.59} + 59%|█████▉ | 7220/12188 [15:33:35<10:02:45, 7.28s/it] 59%|█████▉ | 7221/12188 [15:33:42<10:05:03, 7.31s/it] {'loss': 0.2682, 'grad_norm': 0.6759050204132572, 'learning_rate': 3.7601029143523767e-06, 'epoch': 0.59} + 59%|█████▉ | 7221/12188 [15:33:42<10:05:03, 7.31s/it] 59%|█████▉ | 7222/12188 [15:33:49<9:58:55, 7.24s/it] {'loss': 0.2848, 'grad_norm': 0.6700895978768787, 'learning_rate': 3.758815753802393e-06, 'epoch': 0.59} + 59%|█████▉ | 7222/12188 [15:33:49<9:58:55, 7.24s/it] 59%|█████▉ | 7223/12188 [15:33:56<9:53:28, 7.17s/it] {'loss': 0.3027, 'grad_norm': 0.6680421135136808, 'learning_rate': 3.7575286809028455e-06, 'epoch': 0.59} + 59%|█████▉ | 7223/12188 [15:33:56<9:53:28, 7.17s/it] 59%|█████▉ | 7224/12188 [15:34:04<10:02:07, 7.28s/it] {'loss': 0.3104, 'grad_norm': 0.6888500843828618, 'learning_rate': 3.7562416957446274e-06, 'epoch': 0.59} + 59%|█████▉ | 7224/12188 [15:34:04<10:02:07, 7.28s/it] 59%|█████▉ | 7225/12188 [15:34:11<9:58:55, 7.24s/it] {'loss': 0.328, 'grad_norm': 0.7389586104534029, 'learning_rate': 3.7549547984186204e-06, 'epoch': 0.59} + 59%|█████▉ | 7225/12188 [15:34:11<9:58:55, 7.24s/it] 59%|█████▉ | 7226/12188 [15:34:18<9:48:02, 7.11s/it] {'loss': 0.2966, 'grad_norm': 0.6806113925253936, 'learning_rate': 3.7536679890157052e-06, 'epoch': 0.59} + 59%|█████▉ | 7226/12188 [15:34:18<9:48:02, 7.11s/it] 59%|█████▉ | 7227/12188 [15:34:24<9:41:13, 7.03s/it] {'loss': 0.3078, 'grad_norm': 0.7077353145315458, 'learning_rate': 3.7523812676267563e-06, 'epoch': 0.59} + 59%|█████▉ | 7227/12188 [15:34:24<9:41:13, 7.03s/it] 59%|█████▉ | 7228/12188 [15:34:31<9:43:23, 7.06s/it] {'loss': 0.3537, 'grad_norm': 0.7184138541277988, 'learning_rate': 3.7510946343426356e-06, 'epoch': 0.59} + 59%|█████▉ | 7228/12188 [15:34:31<9:43:23, 7.06s/it] 59%|█████▉ | 7229/12188 [15:34:38<9:35:08, 6.96s/it] {'loss': 0.2996, 'grad_norm': 0.7184675785292883, 'learning_rate': 3.749808089254208e-06, 'epoch': 0.59} + 59%|█████▉ | 7229/12188 [15:34:38<9:35:08, 6.96s/it] 59%|█████▉ | 7230/12188 [15:34:46<9:43:49, 7.07s/it] {'loss': 0.2841, 'grad_norm': 0.6692012974499022, 'learning_rate': 3.748521632452323e-06, 'epoch': 0.59} + 59%|█████▉ | 7230/12188 [15:34:46<9:43:49, 7.07s/it] 59%|█████▉ | 7231/12188 [15:34:53<9:58:11, 7.24s/it] {'loss': 0.3123, 'grad_norm': 0.6773130516246277, 'learning_rate': 3.7472352640278287e-06, 'epoch': 0.59} + 59%|█████▉ | 7231/12188 [15:34:53<9:58:11, 7.24s/it] 59%|█████▉ | 7232/12188 [15:35:01<10:11:20, 7.40s/it] {'loss': 0.357, 'grad_norm': 0.7392915702466201, 'learning_rate': 3.7459489840715694e-06, 'epoch': 0.59} + 59%|█████▉ | 7232/12188 [15:35:01<10:11:20, 7.40s/it] 59%|█████▉ | 7233/12188 [15:35:08<10:07:13, 7.35s/it] {'loss': 0.2948, 'grad_norm': 0.6782726310269451, 'learning_rate': 3.7446627926743763e-06, 'epoch': 0.59} + 59%|█████▉ | 7233/12188 [15:35:08<10:07:13, 7.35s/it] 59%|█████▉ | 7234/12188 [15:35:15<9:55:37, 7.21s/it] {'loss': 0.3875, 'grad_norm': 0.6809573472011539, 'learning_rate': 3.7433766899270797e-06, 'epoch': 0.59} + 59%|█████▉ | 7234/12188 [15:35:15<9:55:37, 7.21s/it] 59%|█████▉ | 7235/12188 [15:35:22<9:47:16, 7.11s/it] {'loss': 0.3547, 'grad_norm': 0.6859074512711146, 'learning_rate': 3.7420906759205045e-06, 'epoch': 0.59} + 59%|█████▉ | 7235/12188 [15:35:22<9:47:16, 7.11s/it] 59%|█████▉ | 7236/12188 [15:35:29<9:38:09, 7.01s/it] {'loss': 0.3576, 'grad_norm': 0.666873904757583, 'learning_rate': 3.740804750745464e-06, 'epoch': 0.59} + 59%|█████▉ | 7236/12188 [15:35:29<9:38:09, 7.01s/it] 59%|█████▉ | 7237/12188 [15:35:36<9:33:40, 6.95s/it] {'loss': 0.2936, 'grad_norm': 0.6701863723744211, 'learning_rate': 3.739518914492768e-06, 'epoch': 0.59} + 59%|█████▉ | 7237/12188 [15:35:36<9:33:40, 6.95s/it] 59%|█████▉ | 7238/12188 [15:35:43<9:40:30, 7.04s/it] {'loss': 0.3125, 'grad_norm': 0.679773633658077, 'learning_rate': 3.738233167253222e-06, 'epoch': 0.59} + 59%|█████▉ | 7238/12188 [15:35:43<9:40:30, 7.04s/it] 59%|█████▉ | 7239/12188 [15:35:51<9:59:06, 7.26s/it] {'loss': 0.3495, 'grad_norm': 0.7524534966551839, 'learning_rate': 3.736947509117621e-06, 'epoch': 0.59} + 59%|█████▉ | 7239/12188 [15:35:51<9:59:06, 7.26s/it] 59%|█████▉ | 7240/12188 [15:35:57<9:41:45, 7.05s/it] {'loss': 0.323, 'grad_norm': 0.7770783319244988, 'learning_rate': 3.73566194017676e-06, 'epoch': 0.59} + 59%|█████▉ | 7240/12188 [15:35:57<9:41:45, 7.05s/it] 59%|█████▉ | 7241/12188 [15:36:04<9:35:00, 6.97s/it] {'loss': 0.3385, 'grad_norm': 0.7501839180103946, 'learning_rate': 3.7343764605214194e-06, 'epoch': 0.59} + 59%|█████▉ | 7241/12188 [15:36:04<9:35:00, 6.97s/it] 59%|█████▉ | 7242/12188 [15:36:11<9:42:08, 7.06s/it] {'loss': 0.3021, 'grad_norm': 0.6742068067919476, 'learning_rate': 3.7330910702423818e-06, 'epoch': 0.59} + 59%|█████▉ | 7242/12188 [15:36:11<9:42:08, 7.06s/it] 59%|█████▉ | 7243/12188 [15:36:18<9:37:55, 7.01s/it] {'loss': 0.3354, 'grad_norm': 0.8434091326360804, 'learning_rate': 3.7318057694304156e-06, 'epoch': 0.59} + 59%|█████▉ | 7243/12188 [15:36:18<9:37:55, 7.01s/it] 59%|█████▉ | 7244/12188 [15:36:26<9:58:37, 7.26s/it] {'loss': 0.2835, 'grad_norm': 0.7366218374897413, 'learning_rate': 3.7305205581762895e-06, 'epoch': 0.59} + 59%|█████▉ | 7244/12188 [15:36:26<9:58:37, 7.26s/it] 59%|█████▉ | 7245/12188 [15:36:34<10:07:41, 7.38s/it] {'loss': 0.3066, 'grad_norm': 1.0855631549824982, 'learning_rate': 3.729235436570762e-06, 'epoch': 0.59} + 59%|█████▉ | 7245/12188 [15:36:34<10:07:41, 7.38s/it] 59%|█████▉ | 7246/12188 [15:36:41<10:05:14, 7.35s/it] {'loss': 0.3609, 'grad_norm': 0.7120864891154057, 'learning_rate': 3.7279504047045855e-06, 'epoch': 0.59} + 59%|█████▉ | 7246/12188 [15:36:41<10:05:14, 7.35s/it] 59%|█████▉ | 7247/12188 [15:36:47<9:47:16, 7.13s/it] {'loss': 0.3057, 'grad_norm': 0.7061888123626341, 'learning_rate': 3.726665462668509e-06, 'epoch': 0.59} + 59%|█████▉ | 7247/12188 [15:36:47<9:47:16, 7.13s/it] 59%|█████▉ | 7248/12188 [15:36:55<9:53:15, 7.21s/it] {'loss': 0.3482, 'grad_norm': 0.6759962244523561, 'learning_rate': 3.7253806105532698e-06, 'epoch': 0.59} + 59%|█████▉ | 7248/12188 [15:36:55<9:53:15, 7.21s/it] 59%|█████▉ | 7249/12188 [15:37:03<10:22:08, 7.56s/it] {'loss': 0.3179, 'grad_norm': 0.7363265492753155, 'learning_rate': 3.724095848449604e-06, 'epoch': 0.59} + 59%|█████▉ | 7249/12188 [15:37:03<10:22:08, 7.56s/it] 59%|█████▉ | 7250/12188 [15:37:11<10:35:05, 7.72s/it] {'loss': 0.3806, 'grad_norm': 0.686769550183009, 'learning_rate': 3.7228111764482423e-06, 'epoch': 0.59} + 59%|█████▉ | 7250/12188 [15:37:11<10:35:05, 7.72s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f16c66e3240> +[Try #0] Failed to fetch sample 4526075 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f16c66e3240> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Help & Contact'"}, {'from': 'gpt', 'value': '\nclick(x=0.425, y=0.1195)\n'}]} + 59%|█████▉ | 7251/12188 [15:37:19<10:34:41, 7.71s/it] {'loss': 0.3323, 'grad_norm': 0.6977129454091969, 'learning_rate': 3.7215265946399016e-06, 'epoch': 0.59} + 59%|█████▉ | 7251/12188 [15:37:19<10:34:41, 7.71s/it] 60%|█████▉ | 7252/12188 [15:37:26<10:16:19, 7.49s/it] {'loss': 0.323, 'grad_norm': 0.7801505859272524, 'learning_rate': 3.720242103115298e-06, 'epoch': 0.59} + 60%|█████▉ | 7252/12188 [15:37:26<10:16:19, 7.49s/it] 60%|█████▉ | 7253/12188 [15:37:33<9:59:43, 7.29s/it] {'loss': 0.2681, 'grad_norm': 0.7650211342392836, 'learning_rate': 3.7189577019651433e-06, 'epoch': 0.6} + 60%|█████▉ | 7253/12188 [15:37:33<9:59:43, 7.29s/it] 60%|█████▉ | 7254/12188 [15:37:41<10:27:44, 7.63s/it] {'loss': 0.3297, 'grad_norm': 0.772891127440572, 'learning_rate': 3.717673391280135e-06, 'epoch': 0.6} + 60%|█████▉ | 7254/12188 [15:37:41<10:27:44, 7.63s/it] 60%|█████▉ | 7255/12188 [15:37:48<10:13:02, 7.46s/it] {'loss': 0.3283, 'grad_norm': 0.6864706621340303, 'learning_rate': 3.7163891711509738e-06, 'epoch': 0.6} + 60%|█████▉ | 7255/12188 [15:37:48<10:13:02, 7.46s/it] 60%|█████▉ | 7256/12188 [15:37:55<9:59:40, 7.30s/it] {'loss': 0.3024, 'grad_norm': 0.665090747074034, 'learning_rate': 3.7151050416683456e-06, 'epoch': 0.6} + 60%|█████▉ | 7256/12188 [15:37:55<9:59:40, 7.30s/it] 60%|█████▉ | 7257/12188 [15:38:03<10:02:27, 7.33s/it] {'loss': 0.346, 'grad_norm': 0.7113259825586545, 'learning_rate': 3.713821002922935e-06, 'epoch': 0.6} + 60%|█████▉ | 7257/12188 [15:38:03<10:02:27, 7.33s/it] 60%|█████▉ | 7258/12188 [15:38:09<9:47:52, 7.15s/it] {'loss': 0.3088, 'grad_norm': 0.6749758276169923, 'learning_rate': 3.71253705500542e-06, 'epoch': 0.6} + 60%|█████▉ | 7258/12188 [15:38:09<9:47:52, 7.15s/it] 60%|█████▉ | 7259/12188 [15:38:16<9:40:33, 7.07s/it] {'loss': 0.3078, 'grad_norm': 0.7706345084268255, 'learning_rate': 3.711253198006468e-06, 'epoch': 0.6} + 60%|█████▉ | 7259/12188 [15:38:16<9:40:33, 7.07s/it] 60%|█████▉ | 7260/12188 [15:38:23<9:35:41, 7.01s/it] {'loss': 0.3454, 'grad_norm': 0.7358529936646714, 'learning_rate': 3.709969432016747e-06, 'epoch': 0.6} + 60%|█████▉ | 7260/12188 [15:38:23<9:35:41, 7.01s/it] 60%|█████▉ | 7261/12188 [15:38:30<9:33:49, 6.99s/it] {'loss': 0.3133, 'grad_norm': 0.6368017247116778, 'learning_rate': 3.7086857571269097e-06, 'epoch': 0.6} + 60%|█████▉ | 7261/12188 [15:38:30<9:33:49, 6.99s/it] 60%|█████▉ | 7262/12188 [15:38:36<9:20:40, 6.83s/it] {'loss': 0.3158, 'grad_norm': 0.6985597627254309, 'learning_rate': 3.707402173427609e-06, 'epoch': 0.6} + 60%|█████▉ | 7262/12188 [15:38:37<9:20:40, 6.83s/it] 60%|█████▉ | 7263/12188 [15:38:43<9:15:03, 6.76s/it] {'loss': 0.3168, 'grad_norm': 0.734438450284418, 'learning_rate': 3.706118681009493e-06, 'epoch': 0.6} + 60%|█████▉ | 7263/12188 [15:38:43<9:15:03, 6.76s/it] 60%|█████▉ | 7264/12188 [15:38:50<9:23:08, 6.86s/it] {'loss': 0.3236, 'grad_norm': 0.6853146413745365, 'learning_rate': 3.7048352799631957e-06, 'epoch': 0.6} + 60%|█████▉ | 7264/12188 [15:38:50<9:23:08, 6.86s/it] 60%|█████▉ | 7265/12188 [15:38:57<9:16:33, 6.78s/it] {'loss': 0.3315, 'grad_norm': 0.7608783916951116, 'learning_rate': 3.7035519703793497e-06, 'epoch': 0.6} + 60%|█████▉ | 7265/12188 [15:38:57<9:16:33, 6.78s/it] 60%|█████▉ | 7266/12188 [15:39:04<9:20:09, 6.83s/it] {'loss': 0.3328, 'grad_norm': 0.8123752372381187, 'learning_rate': 3.702268752348581e-06, 'epoch': 0.6} + 60%|█████▉ | 7266/12188 [15:39:04<9:20:09, 6.83s/it] 60%|█████▉ | 7267/12188 [15:39:11<9:23:24, 6.87s/it] {'loss': 0.303, 'grad_norm': 0.6622995668208891, 'learning_rate': 3.7009856259615074e-06, 'epoch': 0.6} + 60%|█████▉ | 7267/12188 [15:39:11<9:23:24, 6.87s/it] 60%|█████▉ | 7268/12188 [15:39:18<9:27:31, 6.92s/it] {'loss': 0.2835, 'grad_norm': 0.7810517645253268, 'learning_rate': 3.699702591308743e-06, 'epoch': 0.6} + 60%|█████▉ | 7268/12188 [15:39:18<9:27:31, 6.92s/it] 60%|█████▉ | 7269/12188 [15:39:25<9:32:15, 6.98s/it] {'loss': 0.3245, 'grad_norm': 0.6686856025464344, 'learning_rate': 3.698419648480891e-06, 'epoch': 0.6} + 60%|█████▉ | 7269/12188 [15:39:25<9:32:15, 6.98s/it] 60%|█████▉ | 7270/12188 [15:39:32<9:27:32, 6.92s/it] {'loss': 0.2829, 'grad_norm': 0.6346466841490684, 'learning_rate': 3.6971367975685536e-06, 'epoch': 0.6} + 60%|█████▉ | 7270/12188 [15:39:32<9:27:32, 6.92s/it] 60%|█████▉ | 7271/12188 [15:39:39<9:49:08, 7.19s/it] {'loss': 0.3305, 'grad_norm': 0.6981589267238595, 'learning_rate': 3.695854038662322e-06, 'epoch': 0.6} + 60%|█████▉ | 7271/12188 [15:39:39<9:49:08, 7.19s/it] 60%|█████▉ | 7272/12188 [15:39:46<9:42:42, 7.11s/it] {'loss': 0.3408, 'grad_norm': 0.6838059610423393, 'learning_rate': 3.6945713718527832e-06, 'epoch': 0.6} + 60%|█████▉ | 7272/12188 [15:39:46<9:42:42, 7.11s/it] 60%|█████▉ | 7273/12188 [15:39:54<9:55:05, 7.26s/it] {'loss': 0.3159, 'grad_norm': 0.7070381234745083, 'learning_rate': 3.6932887972305177e-06, 'epoch': 0.6} + 60%|█████▉ | 7273/12188 [15:39:54<9:55:05, 7.26s/it] 60%|█████▉ | 7274/12188 [15:40:03<10:49:01, 7.92s/it] {'loss': 0.308, 'grad_norm': 0.6690927082684344, 'learning_rate': 3.6920063148860954e-06, 'epoch': 0.6} + 60%|█████▉ | 7274/12188 [15:40:03<10:49:01, 7.92s/it] 60%|█████▉ | 7275/12188 [15:40:11<10:26:53, 7.66s/it] {'loss': 0.3302, 'grad_norm': 0.7190410605437944, 'learning_rate': 3.6907239249100857e-06, 'epoch': 0.6} + 60%|█████▉ | 7275/12188 [15:40:11<10:26:53, 7.66s/it] 60%|█████▉ | 7276/12188 [15:40:18<10:14:48, 7.51s/it] {'loss': 0.2848, 'grad_norm': 0.6518797660523141, 'learning_rate': 3.689441627393051e-06, 'epoch': 0.6} + 60%|█████▉ | 7276/12188 [15:40:18<10:14:48, 7.51s/it] 60%|█████▉ | 7277/12188 [15:40:25<10:03:17, 7.37s/it] {'loss': 0.3346, 'grad_norm': 0.7090811376041319, 'learning_rate': 3.6881594224255414e-06, 'epoch': 0.6} + 60%|█████▉ | 7277/12188 [15:40:25<10:03:17, 7.37s/it] 60%|█████▉ | 7278/12188 [15:40:33<10:30:56, 7.71s/it] {'loss': 0.3044, 'grad_norm': 0.6489565772951977, 'learning_rate': 3.6868773100981083e-06, 'epoch': 0.6} + 60%|█████▉ | 7278/12188 [15:40:33<10:30:56, 7.71s/it] 60%|█████▉ | 7279/12188 [15:40:41<10:26:05, 7.65s/it] {'loss': 0.3256, 'grad_norm': 0.6510453408622608, 'learning_rate': 3.685595290501288e-06, 'epoch': 0.6} + 60%|█████▉ | 7279/12188 [15:40:41<10:26:05, 7.65s/it] 60%|█████▉ | 7280/12188 [15:40:48<10:04:09, 7.39s/it] {'loss': 0.292, 'grad_norm': 1.148534745385316, 'learning_rate': 3.6843133637256158e-06, 'epoch': 0.6} + 60%|█████▉ | 7280/12188 [15:40:48<10:04:09, 7.39s/it] 60%|█████▉ | 7281/12188 [15:40:54<9:54:10, 7.27s/it] {'loss': 0.3216, 'grad_norm': 0.7341198833679929, 'learning_rate': 3.6830315298616215e-06, 'epoch': 0.6} + 60%|█████▉ | 7281/12188 [15:40:55<9:54:10, 7.27s/it] 60%|█████▉ | 7282/12188 [15:41:01<9:45:47, 7.16s/it] {'loss': 0.3459, 'grad_norm': 0.658285468798146, 'learning_rate': 3.6817497889998233e-06, 'epoch': 0.6} + 60%|█████▉ | 7282/12188 [15:41:01<9:45:47, 7.16s/it] 60%|█████▉ | 7283/12188 [15:41:10<10:16:29, 7.54s/it] {'loss': 0.2866, 'grad_norm': 0.6997172678820773, 'learning_rate': 3.680468141230739e-06, 'epoch': 0.6} + 60%|█████▉ | 7283/12188 [15:41:10<10:16:29, 7.54s/it] 60%|█████▉ | 7284/12188 [15:41:18<10:31:10, 7.72s/it] {'loss': 0.3101, 'grad_norm': 0.6481588571625304, 'learning_rate': 3.679186586644873e-06, 'epoch': 0.6} + 60%|█████▉ | 7284/12188 [15:41:18<10:31:10, 7.72s/it] 60%|█████▉ | 7285/12188 [15:41:25<10:16:07, 7.54s/it] {'loss': 0.3507, 'grad_norm': 0.7178630366960127, 'learning_rate': 3.677905125332728e-06, 'epoch': 0.6} + 60%|█████▉ | 7285/12188 [15:41:25<10:16:07, 7.54s/it] 60%|█████▉ | 7286/12188 [15:41:32<9:51:08, 7.24s/it] {'loss': 0.3162, 'grad_norm': 0.6576814291414941, 'learning_rate': 3.676623757384801e-06, 'epoch': 0.6} + 60%|█████▉ | 7286/12188 [15:41:32<9:51:08, 7.24s/it] 60%|█████▉ | 7287/12188 [15:41:39<9:48:25, 7.20s/it] {'loss': 0.3362, 'grad_norm': 0.6416342409749265, 'learning_rate': 3.675342482891576e-06, 'epoch': 0.6} + 60%|█████▉ | 7287/12188 [15:41:39<9:48:25, 7.20s/it] 60%|█████▉ | 7288/12188 [15:41:46<9:50:32, 7.23s/it] {'loss': 0.3184, 'grad_norm': 0.6588084403267093, 'learning_rate': 3.6740613019435356e-06, 'epoch': 0.6} + 60%|█████▉ | 7288/12188 [15:41:46<9:50:32, 7.23s/it] 60%|█████▉ | 7289/12188 [15:41:53<9:33:27, 7.02s/it] {'loss': 0.3224, 'grad_norm': 0.9191869639003687, 'learning_rate': 3.672780214631159e-06, 'epoch': 0.6} + 60%|█████▉ | 7289/12188 [15:41:53<9:33:27, 7.02s/it] 60%|█████▉ | 7290/12188 [15:41:59<9:29:49, 6.98s/it] {'loss': 0.3089, 'grad_norm': 0.768504225771457, 'learning_rate': 3.6714992210449084e-06, 'epoch': 0.6} + 60%|█████▉ | 7290/12188 [15:41:59<9:29:49, 6.98s/it] 60%|█████▉ | 7291/12188 [15:42:07<9:43:46, 7.15s/it] {'loss': 0.3014, 'grad_norm': 0.6488379547114949, 'learning_rate': 3.6702183212752513e-06, 'epoch': 0.6} + 60%|█████▉ | 7291/12188 [15:42:07<9:43:46, 7.15s/it] 60%|█████▉ | 7292/12188 [15:42:14<9:34:24, 7.04s/it] {'loss': 0.3621, 'grad_norm': 0.7222790851309039, 'learning_rate': 3.6689375154126384e-06, 'epoch': 0.6} + 60%|█████▉ | 7292/12188 [15:42:14<9:34:24, 7.04s/it] 60%|█████▉ | 7293/12188 [15:42:22<10:11:01, 7.49s/it] {'loss': 0.37, 'grad_norm': 0.6700465501766817, 'learning_rate': 3.667656803547519e-06, 'epoch': 0.6} + 60%|█████▉ | 7293/12188 [15:42:22<10:11:01, 7.49s/it] 60%|█████▉ | 7294/12188 [15:42:30<10:04:40, 7.41s/it] {'loss': 0.3051, 'grad_norm': 0.7438818332623135, 'learning_rate': 3.666376185770337e-06, 'epoch': 0.6} + 60%|█████▉ | 7294/12188 [15:42:30<10:04:40, 7.41s/it] 60%|█████▉ | 7295/12188 [15:42:36<9:46:41, 7.19s/it] {'loss': 0.3341, 'grad_norm': 0.7261410157720272, 'learning_rate': 3.6650956621715246e-06, 'epoch': 0.6} + 60%|█████▉ | 7295/12188 [15:42:36<9:46:41, 7.19s/it] 60%|█████▉ | 7296/12188 [15:42:43<9:31:32, 7.01s/it] {'loss': 0.307, 'grad_norm': 0.6803243049794409, 'learning_rate': 3.6638152328415134e-06, 'epoch': 0.6} + 60%|█████▉ | 7296/12188 [15:42:43<9:31:32, 7.01s/it] 60%|█████▉ | 7297/12188 [15:42:50<9:29:52, 6.99s/it] {'loss': 0.312, 'grad_norm': 0.7715378664035378, 'learning_rate': 3.662534897870722e-06, 'epoch': 0.6} + 60%|█████▉ | 7297/12188 [15:42:50<9:29:52, 6.99s/it] 60%|█████▉ | 7298/12188 [15:42:57<9:26:58, 6.96s/it] {'loss': 0.3708, 'grad_norm': 0.6351767010365732, 'learning_rate': 3.661254657349568e-06, 'epoch': 0.6} + 60%|█████▉ | 7298/12188 [15:42:57<9:26:58, 6.96s/it] 60%|█████▉ | 7299/12188 [15:43:04<9:29:31, 6.99s/it] {'loss': 0.3405, 'grad_norm': 0.7757449025974851, 'learning_rate': 3.65997451136846e-06, 'epoch': 0.6} + 60%|█████▉ | 7299/12188 [15:43:04<9:29:31, 6.99s/it] 60%|█████▉ | 7300/12188 [15:43:10<9:19:58, 6.87s/it] {'loss': 0.3275, 'grad_norm': 0.7040245667837078, 'learning_rate': 3.6586944600177987e-06, 'epoch': 0.6} + 60%|█████▉ | 7300/12188 [15:43:10<9:19:58, 6.87s/it] 60%|█████▉ | 7301/12188 [15:43:18<9:42:36, 7.15s/it] {'loss': 0.2972, 'grad_norm': 0.7331961300491002, 'learning_rate': 3.6574145033879806e-06, 'epoch': 0.6} + 60%|█████▉ | 7301/12188 [15:43:18<9:42:36, 7.15s/it] 60%|█████▉ | 7302/12188 [15:43:25<9:38:58, 7.11s/it] {'loss': 0.3326, 'grad_norm': 0.709826309170206, 'learning_rate': 3.656134641569391e-06, 'epoch': 0.6} + 60%|█████▉ | 7302/12188 [15:43:25<9:38:58, 7.11s/it] 60%|█████▉ | 7303/12188 [15:43:34<10:09:25, 7.49s/it] {'loss': 0.3218, 'grad_norm': 0.6540036991545352, 'learning_rate': 3.654854874652414e-06, 'epoch': 0.6} + 60%|█████▉ | 7303/12188 [15:43:34<10:09:25, 7.49s/it] 60%|█████▉ | 7304/12188 [15:43:42<10:41:35, 7.88s/it] {'loss': 0.302, 'grad_norm': 0.614396156043077, 'learning_rate': 3.653575202727427e-06, 'epoch': 0.6} + 60%|█████▉ | 7304/12188 [15:43:42<10:41:35, 7.88s/it] 60%|█████▉ | 7305/12188 [15:43:50<10:42:21, 7.89s/it] {'loss': 0.2887, 'grad_norm': 0.7327629423396594, 'learning_rate': 3.652295625884793e-06, 'epoch': 0.6} + 60%|█████▉ | 7305/12188 [15:43:50<10:42:21, 7.89s/it] 60%|█████▉ | 7306/12188 [15:43:57<10:16:31, 7.58s/it] {'loss': 0.3229, 'grad_norm': 0.6628188838512677, 'learning_rate': 3.6510161442148783e-06, 'epoch': 0.6} + 60%|█████▉ | 7306/12188 [15:43:57<10:16:31, 7.58s/it] 60%|█████▉ | 7307/12188 [15:44:05<10:28:58, 7.73s/it] {'loss': 0.3521, 'grad_norm': 0.7430644297537885, 'learning_rate': 3.649736757808038e-06, 'epoch': 0.6} + 60%|█████▉ | 7307/12188 [15:44:05<10:28:58, 7.73s/it] 60%|█████▉ | 7308/12188 [15:44:12<10:14:39, 7.56s/it] {'loss': 0.3319, 'grad_norm': 0.7156335800722952, 'learning_rate': 3.6484574667546157e-06, 'epoch': 0.6} + 60%|█████▉ | 7308/12188 [15:44:12<10:14:39, 7.56s/it] 60%|█████▉ | 7309/12188 [15:44:19<9:54:07, 7.31s/it] {'loss': 0.3207, 'grad_norm': 0.6669503415210385, 'learning_rate': 3.647178271144958e-06, 'epoch': 0.6} + 60%|█████▉ | 7309/12188 [15:44:19<9:54:07, 7.31s/it] 60%|█████▉ | 7310/12188 [15:44:26<9:41:55, 7.16s/it] {'loss': 0.3435, 'grad_norm': 0.644421249431716, 'learning_rate': 3.6458991710693946e-06, 'epoch': 0.6} + 60%|█████▉ | 7310/12188 [15:44:26<9:41:55, 7.16s/it] 60%|█████▉ | 7311/12188 [15:44:33<9:49:47, 7.26s/it] {'loss': 0.2999, 'grad_norm': 0.6302408643465133, 'learning_rate': 3.644620166618257e-06, 'epoch': 0.6} + 60%|█████▉ | 7311/12188 [15:44:33<9:49:47, 7.26s/it] 60%|█████▉ | 7312/12188 [15:44:41<10:01:35, 7.40s/it] {'loss': 0.2902, 'grad_norm': 0.6843770928777246, 'learning_rate': 3.643341257881867e-06, 'epoch': 0.6} + 60%|█████▉ | 7312/12188 [15:44:41<10:01:35, 7.40s/it] 60%|██████ | 7313/12188 [15:44:48<9:54:18, 7.31s/it] {'loss': 0.3288, 'grad_norm': 0.6340774882562198, 'learning_rate': 3.642062444950537e-06, 'epoch': 0.6} + 60%|██████ | 7313/12188 [15:44:48<9:54:18, 7.31s/it] 60%|██████ | 7314/12188 [15:44:55<9:47:47, 7.24s/it] {'loss': 0.3466, 'grad_norm': 0.6893527728179772, 'learning_rate': 3.6407837279145765e-06, 'epoch': 0.6} + 60%|██████ | 7314/12188 [15:44:55<9:47:47, 7.24s/it] 60%|██████ | 7315/12188 [15:45:04<10:31:06, 7.77s/it] {'loss': 0.318, 'grad_norm': 0.6645323397570695, 'learning_rate': 3.6395051068642833e-06, 'epoch': 0.6} + 60%|██████ | 7315/12188 [15:45:04<10:31:06, 7.77s/it] 60%|██████ | 7316/12188 [15:45:12<10:36:05, 7.83s/it] {'loss': 0.296, 'grad_norm': 0.8062779474890003, 'learning_rate': 3.6382265818899537e-06, 'epoch': 0.6} + 60%|██████ | 7316/12188 [15:45:12<10:36:05, 7.83s/it] 60%|██████ | 7317/12188 [15:45:20<10:38:54, 7.87s/it] {'loss': 0.324, 'grad_norm': 0.7757773214514152, 'learning_rate': 3.6369481530818775e-06, 'epoch': 0.6} + 60%|██��███ | 7317/12188 [15:45:20<10:38:54, 7.87s/it] 60%|██████ | 7318/12188 [15:45:28<10:30:01, 7.76s/it] {'loss': 0.318, 'grad_norm': 0.6655574221349635, 'learning_rate': 3.63566982053033e-06, 'epoch': 0.6} + 60%|██████ | 7318/12188 [15:45:28<10:30:01, 7.76s/it] 60%|██████ | 7319/12188 [15:45:34<10:04:26, 7.45s/it] {'loss': 0.3651, 'grad_norm': 0.7160850135958331, 'learning_rate': 3.6343915843255904e-06, 'epoch': 0.6} + 60%|██████ | 7319/12188 [15:45:34<10:04:26, 7.45s/it] 60%|██████ | 7320/12188 [15:45:41<9:41:34, 7.17s/it] {'loss': 0.3211, 'grad_norm': 0.6853297709204458, 'learning_rate': 3.633113444557922e-06, 'epoch': 0.6} + 60%|██████ | 7320/12188 [15:45:41<9:41:34, 7.17s/it] 60%|██████ | 7321/12188 [15:45:48<9:40:18, 7.15s/it] {'loss': 0.3283, 'grad_norm': 0.7659765860091136, 'learning_rate': 3.631835401317585e-06, 'epoch': 0.6} + 60%|██████ | 7321/12188 [15:45:48<9:40:18, 7.15s/it] 60%|██████ | 7322/12188 [15:45:55<9:30:20, 7.03s/it] {'loss': 0.3084, 'grad_norm': 0.6896435234190944, 'learning_rate': 3.630557454694835e-06, 'epoch': 0.6} + 60%|██████ | 7322/12188 [15:45:55<9:30:20, 7.03s/it] 60%|██████ | 7323/12188 [15:46:02<9:26:32, 6.99s/it] {'loss': 0.3106, 'grad_norm': 0.7480456168807056, 'learning_rate': 3.629279604779917e-06, 'epoch': 0.6} + 60%|██████ | 7323/12188 [15:46:02<9:26:32, 6.99s/it] 60%|██████ | 7324/12188 [15:46:08<9:21:15, 6.92s/it] {'loss': 0.3128, 'grad_norm': 0.8452089257104003, 'learning_rate': 3.62800185166307e-06, 'epoch': 0.6} + 60%|██████ | 7324/12188 [15:46:08<9:21:15, 6.92s/it] 60%|██████ | 7325/12188 [15:46:16<9:33:58, 7.08s/it] {'loss': 0.3479, 'grad_norm': 0.6763056392831096, 'learning_rate': 3.62672419543453e-06, 'epoch': 0.6} + 60%|██████ | 7325/12188 [15:46:16<9:33:58, 7.08s/it] 60%|██████ | 7326/12188 [15:46:23<9:43:34, 7.20s/it] {'loss': 0.3362, 'grad_norm': 0.7025491851576205, 'learning_rate': 3.6254466361845204e-06, 'epoch': 0.6} + 60%|██████ | 7326/12188 [15:46:23<9:43:34, 7.20s/it] 60%|██████ | 7327/12188 [15:46:30<9:37:52, 7.13s/it] {'loss': 0.3244, 'grad_norm': 0.7572833314009506, 'learning_rate': 3.624169174003261e-06, 'epoch': 0.6} + 60%|██████ | 7327/12188 [15:46:30<9:37:52, 7.13s/it] 60%|██████ | 7328/12188 [15:46:38<9:40:11, 7.16s/it] {'loss': 0.3187, 'grad_norm': 0.7037637387428657, 'learning_rate': 3.622891808980964e-06, 'epoch': 0.6} + 60%|██████ | 7328/12188 [15:46:38<9:40:11, 7.16s/it] 60%|██████ | 7329/12188 [15:46:47<10:27:33, 7.75s/it] {'loss': 0.327, 'grad_norm': 0.7168570824766943, 'learning_rate': 3.621614541207833e-06, 'epoch': 0.6} + 60%|██████ | 7329/12188 [15:46:47<10:27:33, 7.75s/it] 60%|██████ | 7330/12188 [15:46:54<10:11:10, 7.55s/it] {'loss': 0.3085, 'grad_norm': 0.8246001669015819, 'learning_rate': 3.6203373707740718e-06, 'epoch': 0.6} + 60%|██████ | 7330/12188 [15:46:54<10:11:10, 7.55s/it] 60%|██████ | 7331/12188 [15:47:02<10:22:44, 7.69s/it] {'loss': 0.3188, 'grad_norm': 0.6674883812313396, 'learning_rate': 3.619060297769866e-06, 'epoch': 0.6} + 60%|██████ | 7331/12188 [15:47:02<10:22:44, 7.69s/it] 60%|██████ | 7332/12188 [15:47:09<10:13:19, 7.58s/it] {'loss': 0.3376, 'grad_norm': 0.6254276309516091, 'learning_rate': 3.6177833222854062e-06, 'epoch': 0.6} + 60%|██████ | 7332/12188 [15:47:09<10:13:19, 7.58s/it] 60%|██████ | 7333/12188 [15:47:17<10:15:39, 7.61s/it] {'loss': 0.3022, 'grad_norm': 0.7095592486625008, 'learning_rate': 3.616506444410865e-06, 'epoch': 0.6} + 60%|██████ | 7333/12188 [15:47:17<10:15:39, 7.61s/it] 60%|██████ | 7334/12188 [15:47:24<9:59:37, 7.41s/it] {'loss': 0.2981, 'grad_norm': 0.6069013594220055, 'learning_rate': 3.6152296642364172e-06, 'epoch': 0.6} + 60%|██████ | 7334/12188 [15:47:24<9:59:37, 7.41s/it] 60%|██████ | 7335/12188 [15:47:31<9:51:35, 7.31s/it] {'loss': 0.308, 'grad_norm': 0.6275576457744964, 'learning_rate': 3.613952981852226e-06, 'epoch': 0.6} + 60%|██████ | 7335/12188 [15:47:31<9:51:35, 7.31s/it] 60%|██████ | 7336/12188 [15:47:38<9:48:10, 7.27s/it] {'loss': 0.3095, 'grad_norm': 0.7505411042577407, 'learning_rate': 3.612676397348447e-06, 'epoch': 0.6} + 60%|██████ | 7336/12188 [15:47:38<9:48:10, 7.27s/it] 60%|██████ | 7337/12188 [15:47:45<9:39:48, 7.17s/it] {'loss': 0.3242, 'grad_norm': 0.6736870628585179, 'learning_rate': 3.6113999108152342e-06, 'epoch': 0.6} + 60%|██████ | 7337/12188 [15:47:45<9:39:48, 7.17s/it] 60%|██████ | 7338/12188 [15:47:52<9:38:12, 7.15s/it] {'loss': 0.3014, 'grad_norm': 0.683029094823687, 'learning_rate': 3.6101235223427256e-06, 'epoch': 0.6} + 60%|██████ | 7338/12188 [15:47:52<9:38:12, 7.15s/it] 60%|██████ | 7339/12188 [15:47:59<9:31:02, 7.07s/it] {'loss': 0.3014, 'grad_norm': 0.7495402782139019, 'learning_rate': 3.608847232021062e-06, 'epoch': 0.6} + 60%|██████ | 7339/12188 [15:47:59<9:31:02, 7.07s/it] 60%|██████ | 7340/12188 [15:48:06<9:33:21, 7.10s/it] {'loss': 0.3013, 'grad_norm': 0.6903116043766065, 'learning_rate': 3.607571039940373e-06, 'epoch': 0.6} + 60%|██████ | 7340/12188 [15:48:06<9:33:21, 7.10s/it] 60%|██████ | 7341/12188 [15:48:13<9:28:57, 7.04s/it] {'loss': 0.2938, 'grad_norm': 0.7961319355121266, 'learning_rate': 3.606294946190779e-06, 'epoch': 0.6} + 60%|██████ | 7341/12188 [15:48:13<9:28:57, 7.04s/it] 60%|██████ | 7342/12188 [15:48:20<9:15:46, 6.88s/it] {'loss': 0.2821, 'grad_norm': 0.6546309733361485, 'learning_rate': 3.6050189508623966e-06, 'epoch': 0.6} + 60%|██████ | 7342/12188 [15:48:20<9:15:46, 6.88s/it] 60%|██████ | 7343/12188 [15:48:27<9:20:14, 6.94s/it] {'loss': 0.2985, 'grad_norm': 0.7017238792098635, 'learning_rate': 3.6037430540453366e-06, 'epoch': 0.6} + 60%|██████ | 7343/12188 [15:48:27<9:20:14, 6.94s/it] 60%|██████ | 7344/12188 [15:48:34<9:30:03, 7.06s/it] {'loss': 0.3305, 'grad_norm': 0.6168873353332147, 'learning_rate': 3.6024672558296964e-06, 'epoch': 0.6} + 60%|██████ | 7344/12188 [15:48:34<9:30:03, 7.06s/it] 60%|██████ | 7345/12188 [15:48:41<9:29:09, 7.05s/it] {'loss': 0.3209, 'grad_norm': 0.7085224191700044, 'learning_rate': 3.6011915563055754e-06, 'epoch': 0.6} + 60%|██████ | 7345/12188 [15:48:41<9:29:09, 7.05s/it] 60%|██████ | 7346/12188 [15:48:48<9:19:20, 6.93s/it] {'loss': 0.3095, 'grad_norm': 0.6956024028922172, 'learning_rate': 3.599915955563057e-06, 'epoch': 0.6} + 60%|██████ | 7346/12188 [15:48:48<9:19:20, 6.93s/it] 60%|██████ | 7347/12188 [15:48:54<9:09:39, 6.81s/it] {'loss': 0.2898, 'grad_norm': 0.6364544491451555, 'learning_rate': 3.598640453692226e-06, 'epoch': 0.6} + 60%|██████ | 7347/12188 [15:48:54<9:09:39, 6.81s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f06368f8e00> +[Try #0] Failed to fetch sample 4604224 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f06368f8e00> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Statistics'"}, {'from': 'gpt', 'value': '\nclick(x=0.9085, y=0.583)\n'}]} + 60%|██████ | 7348/12188 [15:49:01<9:18:39, 6.93s/it] {'loss': 0.2909, 'grad_norm': 0.6877773279864221, 'learning_rate': 3.5973650507831558e-06, 'epoch': 0.6} + 60%|██████ | 7348/12188 [15:49:01<9:18:39, 6.93s/it] 60%|██████ | 7349/12188 [15:49:08<9:15:09, 6.88s/it] {'loss': 0.3627, 'grad_norm': 0.7181730749608688, 'learning_rate': 3.596089746925909e-06, 'epoch': 0.6} + 60%|██████ | 7349/12188 [15:49:08<9:15:09, 6.88s/it] 60%|██████ | 7350/12188 [15:49:16<9:28:52, 7.06s/it] {'loss': 0.3182, 'grad_norm': 0.7186457767734491, 'learning_rate': 3.594814542210552e-06, 'epoch': 0.6} + 60%|██████ | 7350/12188 [15:49:16<9:28:52, 7.06s/it] 60%|██████ | 7351/12188 [15:49:22<9:24:52, 7.01s/it] {'loss': 0.3444, 'grad_norm': 0.8326075545963394, 'learning_rate': 3.593539436727132e-06, 'epoch': 0.6} + 60%|██████ | 7351/12188 [15:49:22<9:24:52, 7.01s/it] 60%|██████ | 7352/12188 [15:49:29<9:19:32, 6.94s/it] {'loss': 0.3156, 'grad_norm': 1.0161111494374915, 'learning_rate': 3.592264430565697e-06, 'epoch': 0.6} + 60%|██████ | 7352/12188 [15:49:29<9:19:32, 6.94s/it] 60%|██████ | 7353/12188 [15:49:37<9:40:04, 7.20s/it] {'loss': 0.3349, 'grad_norm': 0.7077551312696579, 'learning_rate': 3.590989523816289e-06, 'epoch': 0.6} + 60%|██████ | 7353/12188 [15:49:37<9:40:04, 7.20s/it] 60%|██████ | 7354/12188 [15:49:45<9:52:48, 7.36s/it] {'loss': 0.3, 'grad_norm': 0.6711663347979142, 'learning_rate': 3.5897147165689357e-06, 'epoch': 0.6} + 60%|██████ | 7354/12188 [15:49:45<9:52:48, 7.36s/it] 60%|██████ | 7355/12188 [15:49:53<10:06:14, 7.53s/it] {'loss': 0.2805, 'grad_norm': 0.7659970535314726, 'learning_rate': 3.5884400089136628e-06, 'epoch': 0.6} + 60%|██████ | 7355/12188 [15:49:53<10:06:14, 7.53s/it] 60%|██████ | 7356/12188 [15:50:00<9:52:30, 7.36s/it] {'loss': 0.3552, 'grad_norm': 0.7126632227606037, 'learning_rate': 3.5871654009404895e-06, 'epoch': 0.6} + 60%|██████ | 7356/12188 [15:50:00<9:52:30, 7.36s/it] 60%|██████ | 7357/12188 [15:50:07<9:59:14, 7.44s/it] {'loss': 0.3275, 'grad_norm': 0.687222799555183, 'learning_rate': 3.5858908927394244e-06, 'epoch': 0.6} + 60%|██████ | 7357/12188 [15:50:07<9:59:14, 7.44s/it] 60%|██████ | 7358/12188 [15:50:14<9:48:44, 7.31s/it] {'loss': 0.2757, 'grad_norm': 0.6752854574759234, 'learning_rate': 3.584616484400474e-06, 'epoch': 0.6} + 60%|██████ | 7358/12188 [15:50:14<9:48:44, 7.31s/it] 60%|██████ | 7359/12188 [15:50:22<9:54:26, 7.39s/it] {'loss': 0.2999, 'grad_norm': 0.7002739935750442, 'learning_rate': 3.5833421760136323e-06, 'epoch': 0.6} + 60%|██████ | 7359/12188 [15:50:22<9:54:26, 7.39s/it] 60%|██████ | 7360/12188 [15:50:29<9:58:17, 7.44s/it] {'loss': 0.3214, 'grad_norm': 0.6637448876898706, 'learning_rate': 3.5820679676688904e-06, 'epoch': 0.6} + 60%|██████ | 7360/12188 [15:50:29<9:58:17, 7.44s/it] 60%|██████ | 7361/12188 [15:50:37<9:50:57, 7.35s/it] {'loss': 0.3226, 'grad_norm': 0.8143801171647592, 'learning_rate': 3.580793859456231e-06, 'epoch': 0.6} + 60%|██████ | 7361/12188 [15:50:37<9:50:57, 7.35s/it] 60%|██████ | 7362/12188 [15:50:43<9:35:40, 7.16s/it] {'loss': 0.3285, 'grad_norm': 0.6652568050805074, 'learning_rate': 3.5795198514656287e-06, 'epoch': 0.6} + 60%|██████ | 7362/12188 [15:50:43<9:35:40, 7.16s/it] 60%|██████ | 7363/12188 [15:50:50<9:32:34, 7.12s/it] {'loss': 0.3471, 'grad_norm': 0.9996737845530881, 'learning_rate': 3.578245943787053e-06, 'epoch': 0.6} + 60%|██████ | 7363/12188 [15:50:50<9:32:34, 7.12s/it] 60%|██████ | 7364/12188 [15:50:58<9:42:52, 7.25s/it] {'loss': 0.307, 'grad_norm': 0.6835515666886643, 'learning_rate': 3.5769721365104635e-06, 'epoch': 0.6} + 60%|██████ | 7364/12188 [15:50:58<9:42:52, 7.25s/it] 60%|██████ | 7365/12188 [15:51:05<9:48:00, 7.32s/it] {'loss': 0.3251, 'grad_norm': 0.6319480167506479, 'learning_rate': 3.5756984297258145e-06, 'epoch': 0.6} + 60%|██████ | 7365/12188 [15:51:05<9:48:00, 7.32s/it] 60%|██████ | 7366/12188 [15:51:12<9:31:16, 7.11s/it] {'loss': 0.322, 'grad_norm': 0.7633612609637127, 'learning_rate': 3.5744248235230572e-06, 'epoch': 0.6} + 60%|██████ | 7366/12188 [15:51:12<9:31:16, 7.11s/it] 60%|██████ | 7367/12188 [15:51:19<9:37:28, 7.19s/it] {'loss': 0.3401, 'grad_norm': 0.6983044596098176, 'learning_rate': 3.5731513179921257e-06, 'epoch': 0.6} + 60%|██████ | 7367/12188 [15:51:19<9:37:28, 7.19s/it] 60%|██████ | 7368/12188 [15:51:27<9:44:07, 7.27s/it] {'loss': 0.2962, 'grad_norm': 0.7699145972715643, 'learning_rate': 3.571877913222958e-06, 'epoch': 0.6} + 60%|██████ | 7368/12188 [15:51:27<9:44:07, 7.27s/it] 60%|██████ | 7369/12188 [15:51:34<9:34:01, 7.15s/it] {'loss': 0.3094, 'grad_norm': 0.9534719818931351, 'learning_rate': 3.5706046093054768e-06, 'epoch': 0.6} + 60%|██████ | 7369/12188 [15:51:34<9:34:01, 7.15s/it] 60%|██████ | 7370/12188 [15:51:42<10:12:56, 7.63s/it] {'loss': 0.2778, 'grad_norm': 0.7654749159009464, 'learning_rate': 3.5693314063296005e-06, 'epoch': 0.6} + 60%|██████ | 7370/12188 [15:51:42<10:12:56, 7.63s/it] 60%|██████ | 7371/12188 [15:51:49<9:47:30, 7.32s/it] {'loss': 0.3069, 'grad_norm': 0.778779999629643, 'learning_rate': 3.5680583043852434e-06, 'epoch': 0.6} + 60%|██████ | 7371/12188 [15:51:49<9:47:30, 7.32s/it] 60%|██████ | 7372/12188 [15:51:56<9:40:21, 7.23s/it] {'loss': 0.345, 'grad_norm': 0.7072589181535862, 'learning_rate': 3.566785303562306e-06, 'epoch': 0.6} + 60%|██████ | 7372/12188 [15:51:56<9:40:21, 7.23s/it] 60%|██████ | 7373/12188 [15:52:03<9:35:29, 7.17s/it] {'loss': 0.3203, 'grad_norm': 0.6008922772180731, 'learning_rate': 3.5655124039506905e-06, 'epoch': 0.6} + 60%|██████ | 7373/12188 [15:52:03<9:35:29, 7.17s/it] 61%|██████ | 7374/12188 [15:52:10<9:32:30, 7.14s/it] {'loss': 0.2876, 'grad_norm': 0.6814734053594317, 'learning_rate': 3.5642396056402816e-06, 'epoch': 0.6} + 61%|██████ | 7374/12188 [15:52:10<9:32:30, 7.14s/it] 61%|██████ | 7375/12188 [15:52:17<9:31:46, 7.13s/it] {'loss': 0.3022, 'grad_norm': 0.7088797464969373, 'learning_rate': 3.5629669087209668e-06, 'epoch': 0.61} + 61%|██████ | 7375/12188 [15:52:17<9:31:46, 7.13s/it] 61%|██████ | 7376/12188 [15:52:25<9:48:25, 7.34s/it] {'loss': 0.3324, 'grad_norm': 0.7059994934401025, 'learning_rate': 3.56169431328262e-06, 'epoch': 0.61} + 61%|██████ | 7376/12188 [15:52:25<9:48:25, 7.34s/it] 61%|██████ | 7377/12188 [15:52:32<9:30:33, 7.12s/it] {'loss': 0.3459, 'grad_norm': 0.7001294319173045, 'learning_rate': 3.5604218194151096e-06, 'epoch': 0.61} + 61%|██████ | 7377/12188 [15:52:32<9:30:33, 7.12s/it] 61%|██████ | 7378/12188 [15:52:38<9:22:10, 7.01s/it] {'loss': 0.3012, 'grad_norm': 0.7019431379833707, 'learning_rate': 3.5591494272082966e-06, 'epoch': 0.61} + 61%|██████ | 7378/12188 [15:52:38<9:22:10, 7.01s/it] 61%|██████ | 7379/12188 [15:52:45<9:20:11, 6.99s/it] {'loss': 0.3043, 'grad_norm': 0.7905428869275991, 'learning_rate': 3.557877136752038e-06, 'epoch': 0.61} + 61%|██████ | 7379/12188 [15:52:45<9:20:11, 6.99s/it] 61%|██████ | 7380/12188 [15:52:53<9:41:45, 7.26s/it] {'loss': 0.3339, 'grad_norm': 0.7470098514730407, 'learning_rate': 3.5566049481361776e-06, 'epoch': 0.61} + 61%|██████ | 7380/12188 [15:52:53<9:41:45, 7.26s/it] 61%|██████ | 7381/12188 [15:53:02<10:11:09, 7.63s/it] {'loss': 0.3019, 'grad_norm': 0.6408536521263387, 'learning_rate': 3.555332861450559e-06, 'epoch': 0.61} + 61%|██████ | 7381/12188 [15:53:02<10:11:09, 7.63s/it] 61%|██████ | 7382/12188 [15:53:09<9:53:38, 7.41s/it] {'loss': 0.3498, 'grad_norm': 0.6479048377891653, 'learning_rate': 3.5540608767850106e-06, 'epoch': 0.61} + 61%|██████ | 7382/12188 [15:53:09<9:53:38, 7.41s/it] 61%|██████ | 7383/12188 [15:53:15<9:35:41, 7.19s/it] {'loss': 0.3344, 'grad_norm': 0.6623904995532996, 'learning_rate': 3.5527889942293594e-06, 'epoch': 0.61} + 61%|██████ | 7383/12188 [15:53:15<9:35:41, 7.19s/it] 61%|██████ | 7384/12188 [15:53:22<9:33:24, 7.16s/it] {'loss': 0.3091, 'grad_norm': 0.7484588443887322, 'learning_rate': 3.5515172138734268e-06, 'epoch': 0.61} + 61%|██████ | 7384/12188 [15:53:22<9:33:24, 7.16s/it] 61%|██████ | 7385/12188 [15:53:30<9:31:27, 7.14s/it] {'loss': 0.2999, 'grad_norm': 0.7673271814812999, 'learning_rate': 3.5502455358070187e-06, 'epoch': 0.61} + 61%|██████ | 7385/12188 [15:53:30<9:31:27, 7.14s/it] 61%|██████ | 7386/12188 [15:53:37<9:41:32, 7.27s/it] {'loss': 0.3507, 'grad_norm': 0.7269810305330422, 'learning_rate': 3.548973960119944e-06, 'epoch': 0.61} + 61%|██████ | 7386/12188 [15:53:37<9:41:32, 7.27s/it] 61%|██████ | 7387/12188 [15:53:44<9:36:40, 7.21s/it] {'loss': 0.2723, 'grad_norm': 0.6443759220490902, 'learning_rate': 3.547702486901995e-06, 'epoch': 0.61} + 61%|██████ | 7387/12188 [15:53:44<9:36:40, 7.21s/it] 61%|██████ | 7388/12188 [15:53:51<9:33:06, 7.16s/it] {'loss': 0.3219, 'grad_norm': 0.669675957277011, 'learning_rate': 3.5464311162429645e-06, 'epoch': 0.61} + 61%|██████ | 7388/12188 [15:53:51<9:33:06, 7.16s/it] 61%|██████ | 7389/12188 [15:53:58<9:26:45, 7.09s/it] {'loss': 0.3506, 'grad_norm': 0.7973761569122617, 'learning_rate': 3.545159848232632e-06, 'epoch': 0.61} + 61%|██████ | 7389/12188 [15:53:58<9:26:45, 7.09s/it] 61%|██████ | 7390/12188 [15:54:05<9:22:19, 7.03s/it] {'loss': 0.2881, 'grad_norm': 0.633215419330747, 'learning_rate': 3.543888682960775e-06, 'epoch': 0.61} + 61%|██████ | 7390/12188 [15:54:05<9:22:19, 7.03s/it] 61%|██████ | 7391/12188 [15:54:12<9:22:14, 7.03s/it] {'loss': 0.3364, 'grad_norm': 0.6959843482386928, 'learning_rate': 3.5426176205171603e-06, 'epoch': 0.61} + 61%|██████ | 7391/12188 [15:54:12<9:22:14, 7.03s/it] 61%|██████ | 7392/12188 [15:54:19<9:21:06, 7.02s/it] {'loss': 0.3129, 'grad_norm': 0.651852076149581, 'learning_rate': 3.541346660991546e-06, 'epoch': 0.61} + 61%|██████ | 7392/12188 [15:54:19<9:21:06, 7.02s/it] 61%|██████ | 7393/12188 [15:54:26<9:12:13, 6.91s/it] {'loss': 0.3443, 'grad_norm': 0.6559761226515187, 'learning_rate': 3.540075804473686e-06, 'epoch': 0.61} + 61%|██████ | 7393/12188 [15:54:26<9:12:13, 6.91s/it] 61%|██████ | 7394/12188 [15:54:34<9:47:06, 7.35s/it] {'loss': 0.3248, 'grad_norm': 0.6328817715962887, 'learning_rate': 3.53880505105333e-06, 'epoch': 0.61} + 61%|██████ | 7394/12188 [15:54:34<9:47:06, 7.35s/it] 61%|██████ | 7395/12188 [15:54:41<9:43:10, 7.30s/it] {'loss': 0.3231, 'grad_norm': 0.6834376408223483, 'learning_rate': 3.5375344008202115e-06, 'epoch': 0.61} + 61%|██████ | 7395/12188 [15:54:41<9:43:10, 7.30s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 61%|██████ | 7396/12188 [15:54:48<9:19:10, 7.00s/it] {'loss': 0.6706, 'grad_norm': 0.6197874129690384, 'learning_rate': 3.5362638538640646e-06, 'epoch': 0.61} + 61%|██████ | 7396/12188 [15:54:48<9:19:10, 7.00s/it] 61%|██████ | 7397/12188 [15:54:54<9:17:38, 6.98s/it] {'loss': 0.3261, 'grad_norm': 0.646493787883927, 'learning_rate': 3.534993410274615e-06, 'epoch': 0.61} + 61%|██████ | 7397/12188 [15:54:55<9:17:38, 6.98s/it] 61%|██████ | 7398/12188 [15:55:01<9:16:04, 6.97s/it] {'loss': 0.322, 'grad_norm': 0.6174009631790393, 'learning_rate': 3.5337230701415737e-06, 'epoch': 0.61} + 61%|██████ | 7398/12188 [15:55:01<9:16:04, 6.97s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f05f7493790> +[Try #0] Failed to fetch sample 4456269 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f05f7493790> +Problematic sample: {'image': '20240827_145511_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Notification Chevron'"}, {'from': 'gpt', 'value': '\nclick(x=0.843, y=0.972)\n'}]} + 61%|██████ | 7399/12188 [15:55:08<9:04:29, 6.82s/it] {'loss': 0.2744, 'grad_norm': 0.6772883884459907, 'learning_rate': 3.5324528335546558e-06, 'epoch': 0.61} + 61%|██████ | 7399/12188 [15:55:08<9:04:29, 6.82s/it] 61%|██████ | 7400/12188 [15:55:14<8:57:12, 6.73s/it] {'loss': 0.306, 'grad_norm': 0.6253233793249897, 'learning_rate': 3.5311827006035594e-06, 'epoch': 0.61} + 61%|██████ | 7400/12188 [15:55:14<8:57:12, 6.73s/it] 61%|██████ | 7401/12188 [15:55:22<9:16:12, 6.97s/it] {'loss': 0.3157, 'grad_norm': 0.7211769706894017, 'learning_rate': 3.52991267137798e-06, 'epoch': 0.61} + 61%|██████ | 7401/12188 [15:55:22<9:16:12, 6.97s/it] 61%|██████ | 7402/12188 [15:55:29<9:14:05, 6.95s/it] {'loss': 0.312, 'grad_norm': 0.6746080028012422, 'learning_rate': 3.528642745967609e-06, 'epoch': 0.61} + 61%|██████ | 7402/12188 [15:55:29<9:14:05, 6.95s/it] 61%|██████ | 7403/12188 [15:55:39<10:26:35, 7.86s/it] {'loss': 0.3309, 'grad_norm': 0.7681626608829278, 'learning_rate': 3.527372924462121e-06, 'epoch': 0.61} + 61%|██████ | 7403/12188 [15:55:39<10:26:35, 7.86s/it] 61%|██████ | 7404/12188 [15:55:46<10:12:39, 7.68s/it] {'loss': 0.3441, 'grad_norm': 0.6757909572389573, 'learning_rate': 3.5261032069511926e-06, 'epoch': 0.61} + 61%|██████ | 7404/12188 [15:55:46<10:12:39, 7.68s/it] 61%|██████ | 7405/12188 [15:55:54<10:11:04, 7.67s/it] {'loss': 0.3003, 'grad_norm': 0.622804551905367, 'learning_rate': 3.524833593524487e-06, 'epoch': 0.61} + 61%|██████ | 7405/12188 [15:55:54<10:11:04, 7.67s/it] 61%|██████ | 7406/12188 [15:56:03<10:42:35, 8.06s/it] {'loss': 0.3158, 'grad_norm': 0.6992014377809824, 'learning_rate': 3.523564084271662e-06, 'epoch': 0.61} + 61%|██████ | 7406/12188 [15:56:03<10:42:35, 8.06s/it] 61%|██████ | 7407/12188 [15:56:10<10:24:25, 7.84s/it] {'loss': 0.3125, 'grad_norm': 0.6359507508842861, 'learning_rate': 3.5222946792823707e-06, 'epoch': 0.61} + 61%|██████ | 7407/12188 [15:56:10<10:24:25, 7.84s/it] 61%|██████ | 7408/12188 [15:56:17<10:06:03, 7.61s/it] {'loss': 0.3001, 'grad_norm': 0.7471791525278911, 'learning_rate': 3.5210253786462537e-06, 'epoch': 0.61} + 61%|██████ | 7408/12188 [15:56:17<10:06:03, 7.61s/it] 61%|██████ | 7409/12188 [15:56:24<9:48:18, 7.39s/it] {'loss': 0.3093, 'grad_norm': 0.7387088881620653, 'learning_rate': 3.5197561824529503e-06, 'epoch': 0.61} + 61%|██████ | 7409/12188 [15:56:24<9:48:18, 7.39s/it] 61%|██████ | 7410/12188 [15:56:33<10:17:32, 7.75s/it] {'loss': 0.3016, 'grad_norm': 0.6420529881391406, 'learning_rate': 3.5184870907920866e-06, 'epoch': 0.61} + 61%|██████ | 7410/12188 [15:56:33<10:17:32, 7.75s/it] 61%|██████ | 7411/12188 [15:56:40<10:07:07, 7.63s/it] {'loss': 0.3001, 'grad_norm': 0.6181070648408427, 'learning_rate': 3.5172181037532826e-06, 'epoch': 0.61} + 61%|██████ | 7411/12188 [15:56:40<10:07:07, 7.63s/it] 61%|██████ | 7412/12188 [15:56:47<10:03:12, 7.58s/it] {'loss': 0.3226, 'grad_norm': 0.6755679450463142, 'learning_rate': 3.515949221426156e-06, 'epoch': 0.61} + 61%|██████ | 7412/12188 [15:56:47<10:03:12, 7.58s/it] 61%|██████ | 7413/12188 [15:56:54<9:51:07, 7.43s/it] {'loss': 0.3158, 'grad_norm': 0.9543806981890995, 'learning_rate': 3.514680443900309e-06, 'epoch': 0.61} + 61%|██████ | 7413/12188 [15:56:54<9:51:07, 7.43s/it] 61%|██████ | 7414/12188 [15:57:02<9:54:58, 7.48s/it] {'loss': 0.3036, 'grad_norm': 0.6282908880820016, 'learning_rate': 3.5134117712653425e-06, 'epoch': 0.61} + 61%|██████ | 7414/12188 [15:57:02<9:54:58, 7.48s/it] 61%|██████ | 7415/12188 [15:57:09<9:37:46, 7.26s/it] {'loss': 0.2856, 'grad_norm': 0.695835357858873, 'learning_rate': 3.5121432036108504e-06, 'epoch': 0.61} + 61%|██████ | 7415/12188 [15:57:09<9:37:46, 7.26s/it] 61%|██████ | 7416/12188 [15:57:16<9:34:08, 7.22s/it] {'loss': 0.3136, 'grad_norm': 0.6845448399419068, 'learning_rate': 3.510874741026414e-06, 'epoch': 0.61} + 61%|██████ | 7416/12188 [15:57:16<9:34:08, 7.22s/it] 61%|██████ | 7417/12188 [15:57:23<9:29:54, 7.17s/it] {'loss': 0.2954, 'grad_norm': 0.6454885469309778, 'learning_rate': 3.5096063836016113e-06, 'epoch': 0.61} + 61%|██████ | 7417/12188 [15:57:23<9:29:54, 7.17s/it] 61%|██████ | 7418/12188 [15:57:30<9:20:48, 7.05s/it] {'loss': 0.3448, 'grad_norm': 0.6798801698306891, 'learning_rate': 3.5083381314260113e-06, 'epoch': 0.61} + 61%|██████ | 7418/12188 [15:57:30<9:20:48, 7.05s/it] 61%|██████ | 7419/12188 [15:57:37<9:19:24, 7.04s/it] {'loss': 0.2999, 'grad_norm': 0.7067141761099209, 'learning_rate': 3.5070699845891743e-06, 'epoch': 0.61} + 61%|██████ | 7419/12188 [15:57:37<9:19:24, 7.04s/it] 61%|██████ | 7420/12188 [15:57:44<9:16:12, 7.00s/it] {'loss': 0.3059, 'grad_norm': 0.6431631379377862, 'learning_rate': 3.505801943180659e-06, 'epoch': 0.61} + 61%|██████ | 7420/12188 [15:57:44<9:16:12, 7.00s/it] 61%|██████ | 7421/12188 [15:57:51<9:22:46, 7.08s/it] {'loss': 0.338, 'grad_norm': 0.8082708547298518, 'learning_rate': 3.504534007290007e-06, 'epoch': 0.61} + 61%|██████ | 7421/12188 [15:57:51<9:22:46, 7.08s/it] 61%|██████ | 7422/12188 [15:57:58<9:21:02, 7.06s/it] {'loss': 0.2908, 'grad_norm': 0.7065092196493253, 'learning_rate': 3.503266177006763e-06, 'epoch': 0.61} + 61%|██████ | 7422/12188 [15:57:58<9:21:02, 7.06s/it] 61%|██████ | 7423/12188 [15:58:08<10:26:09, 7.88s/it] {'loss': 0.2872, 'grad_norm': 0.6720289898318801, 'learning_rate': 3.5019984524204553e-06, 'epoch': 0.61} + 61%|██████ | 7423/12188 [15:58:08<10:26:09, 7.88s/it] 61%|██████ | 7424/12188 [15:58:15<10:09:20, 7.67s/it] {'loss': 0.3256, 'grad_norm': 0.7306292578149894, 'learning_rate': 3.5007308336206114e-06, 'epoch': 0.61} + 61%|██████ | 7424/12188 [15:58:15<10:09:20, 7.67s/it] 61%|██████ | 7425/12188 [15:58:22<9:55:29, 7.50s/it] {'loss': 0.3007, 'grad_norm': 0.7041077903241723, 'learning_rate': 3.4994633206967478e-06, 'epoch': 0.61} + 61%|██████ | 7425/12188 [15:58:22<9:55:29, 7.50s/it] 61%|██████ | 7426/12188 [15:58:29<9:45:09, 7.37s/it] {'loss': 0.3182, 'grad_norm': 0.6919639399491972, 'learning_rate': 3.498195913738372e-06, 'epoch': 0.61} + 61%|██████ | 7426/12188 [15:58:29<9:45:09, 7.37s/it] 61%|██████ | 7427/12188 [15:58:36<9:40:28, 7.32s/it] {'loss': 0.3279, 'grad_norm': 0.6869441225456919, 'learning_rate': 3.4969286128349877e-06, 'epoch': 0.61} + 61%|██████ | 7427/12188 [15:58:36<9:40:28, 7.32s/it] 61%|██████ | 7428/12188 [15:58:43<9:27:03, 7.15s/it] {'loss': 0.3076, 'grad_norm': 0.6909316262050522, 'learning_rate': 3.4956614180760918e-06, 'epoch': 0.61} + 61%|██████ | 7428/12188 [15:58:43<9:27:03, 7.15s/it] 61%|██████ | 7429/12188 [15:58:50<9:21:30, 7.08s/it] {'loss': 0.2888, 'grad_norm': 0.6583538539303995, 'learning_rate': 3.4943943295511678e-06, 'epoch': 0.61} + 61%|██████ | 7429/12188 [15:58:50<9:21:30, 7.08s/it] 61%|██████ | 7430/12188 [15:58:58<9:35:09, 7.25s/it] {'loss': 0.294, 'grad_norm': 0.6848451776811375, 'learning_rate': 3.4931273473496995e-06, 'epoch': 0.61} + 61%|██████ | 7430/12188 [15:58:58<9:35:09, 7.25s/it] 61%|██████ | 7431/12188 [15:59:05<9:44:55, 7.38s/it] {'loss': 0.2974, 'grad_norm': 0.6289519283637874, 'learning_rate': 3.4918604715611558e-06, 'epoch': 0.61} + 61%|██████ | 7431/12188 [15:59:05<9:44:55, 7.38s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 61%|██████ | 7432/12188 [15:59:12<9:23:20, 7.11s/it] {'loss': 0.6493, 'grad_norm': 0.6235288617880282, 'learning_rate': 3.4905937022750014e-06, 'epoch': 0.61} + 61%|██████ | 7432/12188 [15:59:12<9:23:20, 7.11s/it] 61%|██████ | 7433/12188 [15:59:19<9:20:23, 7.07s/it] {'loss': 0.3126, 'grad_norm': 1.8024893571225273, 'learning_rate': 3.489327039580697e-06, 'epoch': 0.61} + 61%|██████ | 7433/12188 [15:59:19<9:20:23, 7.07s/it] 61%|██████ | 7434/12188 [15:59:26<9:24:02, 7.12s/it] {'loss': 0.3033, 'grad_norm': 0.7206758566358886, 'learning_rate': 3.488060483567688e-06, 'epoch': 0.61} + 61%|██████ | 7434/12188 [15:59:26<9:24:02, 7.12s/it] 61%|██████ | 7435/12188 [15:59:33<9:14:44, 7.00s/it] {'loss': 0.3402, 'grad_norm': 0.6641295792764199, 'learning_rate': 3.4867940343254203e-06, 'epoch': 0.61} + 61%|██████ | 7435/12188 [15:59:33<9:14:44, 7.00s/it] 61%|██████ | 7436/12188 [15:59:40<9:31:23, 7.21s/it] {'loss': 0.2968, 'grad_norm': 0.6278915720931426, 'learning_rate': 3.4855276919433246e-06, 'epoch': 0.61} + 61%|██████ | 7436/12188 [15:59:40<9:31:23, 7.21s/it] 61%|██████ | 7437/12188 [15:59:48<9:28:37, 7.18s/it] {'loss': 0.3346, 'grad_norm': 0.7529146756421482, 'learning_rate': 3.4842614565108314e-06, 'epoch': 0.61} + 61%|██████ | 7437/12188 [15:59:48<9:28:37, 7.18s/it] 61%|██████ | 7438/12188 [15:59:55<9:35:27, 7.27s/it] {'loss': 0.3299, 'grad_norm': 0.7489740627491908, 'learning_rate': 3.482995328117359e-06, 'epoch': 0.61} + 61%|���█████ | 7438/12188 [15:59:55<9:35:27, 7.27s/it] 61%|██████ | 7439/12188 [16:00:02<9:36:57, 7.29s/it] {'loss': 0.364, 'grad_norm': 0.7229818550327285, 'learning_rate': 3.4817293068523196e-06, 'epoch': 0.61} + 61%|██████ | 7439/12188 [16:00:02<9:36:57, 7.29s/it] 61%|██████ | 7440/12188 [16:00:10<9:40:51, 7.34s/it] {'loss': 0.2755, 'grad_norm': 0.6300617620884528, 'learning_rate': 3.4804633928051173e-06, 'epoch': 0.61} + 61%|██████ | 7440/12188 [16:00:10<9:40:51, 7.34s/it] 61%|██████ | 7441/12188 [16:00:17<9:37:17, 7.30s/it] {'loss': 0.3117, 'grad_norm': 0.6527034700295054, 'learning_rate': 3.4791975860651474e-06, 'epoch': 0.61} + 61%|██████ | 7441/12188 [16:00:17<9:37:17, 7.30s/it] 61%|██████ | 7442/12188 [16:00:24<9:37:14, 7.30s/it] {'loss': 0.2655, 'grad_norm': 0.620131097526273, 'learning_rate': 3.4779318867218004e-06, 'epoch': 0.61} + 61%|██████ | 7442/12188 [16:00:24<9:37:14, 7.30s/it] 61%|██████ | 7443/12188 [16:00:31<9:28:06, 7.18s/it] {'loss': 0.2992, 'grad_norm': 0.6670204604813074, 'learning_rate': 3.4766662948644602e-06, 'epoch': 0.61} + 61%|██████ | 7443/12188 [16:00:31<9:28:06, 7.18s/it] 61%|██████ | 7444/12188 [16:00:38<9:29:15, 7.20s/it] {'loss': 0.3386, 'grad_norm': 0.6110338225225188, 'learning_rate': 3.475400810582498e-06, 'epoch': 0.61} + 61%|██████ | 7444/12188 [16:00:38<9:29:15, 7.20s/it] 61%|██████ | 7445/12188 [16:00:47<9:51:58, 7.49s/it] {'loss': 0.3218, 'grad_norm': 0.6933367853672178, 'learning_rate': 3.47413543396528e-06, 'epoch': 0.61} + 61%|██████ | 7445/12188 [16:00:47<9:51:58, 7.49s/it] 61%|██████ | 7446/12188 [16:00:55<10:09:06, 7.71s/it] {'loss': 0.3159, 'grad_norm': 0.7006785064662148, 'learning_rate': 3.472870165102169e-06, 'epoch': 0.61} + 61%|██████ | 7446/12188 [16:00:55<10:09:06, 7.71s/it] 61%|██████ | 7447/12188 [16:01:02<9:58:05, 7.57s/it] {'loss': 0.3279, 'grad_norm': 0.696240299575665, 'learning_rate': 3.47160500408251e-06, 'epoch': 0.61} + 61%|██████ | 7447/12188 [16:01:02<9:58:05, 7.57s/it] 61%|██████ | 7448/12188 [16:01:10<9:55:23, 7.54s/it] {'loss': 0.3028, 'grad_norm': 0.7230563246017028, 'learning_rate': 3.4703399509956533e-06, 'epoch': 0.61} + 61%|██████ | 7448/12188 [16:01:10<9:55:23, 7.54s/it] 61%|██████ | 7449/12188 [16:01:16<9:37:45, 7.31s/it] {'loss': 0.3289, 'grad_norm': 0.6770126729315632, 'learning_rate': 3.4690750059309284e-06, 'epoch': 0.61} + 61%|██████ | 7449/12188 [16:01:16<9:37:45, 7.31s/it] 61%|██████ | 7450/12188 [16:01:23<9:19:42, 7.09s/it] {'loss': 0.3088, 'grad_norm': 0.6830809862984007, 'learning_rate': 3.467810168977669e-06, 'epoch': 0.61} + 61%|██████ | 7450/12188 [16:01:23<9:19:42, 7.09s/it] 61%|██████ | 7451/12188 [16:01:30<9:13:26, 7.01s/it] {'loss': 0.3088, 'grad_norm': 0.7175110397263255, 'learning_rate': 3.4665454402251937e-06, 'epoch': 0.61} + 61%|██████ | 7451/12188 [16:01:30<9:13:26, 7.01s/it] 61%|██████ | 7452/12188 [16:01:36<9:06:02, 6.92s/it] {'loss': 0.2988, 'grad_norm': 0.742760267955176, 'learning_rate': 3.465280819762815e-06, 'epoch': 0.61} + 61%|██████ | 7452/12188 [16:01:36<9:06:02, 6.92s/it] 61%|██████ | 7453/12188 [16:01:43<9:03:53, 6.89s/it] {'loss': 0.2996, 'grad_norm': 0.7525410556688317, 'learning_rate': 3.464016307679841e-06, 'epoch': 0.61} + 61%|██████ | 7453/12188 [16:01:43<9:03:53, 6.89s/it] 61%|██████ | 7454/12188 [16:01:51<9:13:24, 7.01s/it] {'loss': 0.3052, 'grad_norm': 0.6883906958773317, 'learning_rate': 3.4627519040655656e-06, 'epoch': 0.61} + 61%|██████ | 7454/12188 [16:01:51<9:13:24, 7.01s/it] 61%|██████ | 7455/12188 [16:01:57<9:04:12, 6.90s/it] {'loss': 0.3287, 'grad_norm': 1.0023920113721536, 'learning_rate': 3.4614876090092797e-06, 'epoch': 0.61} + 61%|██████ | 7455/12188 [16:01:57<9:04:12, 6.90s/it] 61%|██████ | 7456/12188 [16:02:04<9:00:12, 6.85s/it] {'loss': 0.2963, 'grad_norm': 0.6433948661045505, 'learning_rate': 3.4602234226002706e-06, 'epoch': 0.61} + 61%|██████ | 7456/12188 [16:02:04<9:00:12, 6.85s/it] 61%|██████ | 7457/12188 [16:02:11<8:56:31, 6.80s/it] {'loss': 0.2897, 'grad_norm': 0.645875729502254, 'learning_rate': 3.458959344927807e-06, 'epoch': 0.61} + 61%|██████ | 7457/12188 [16:02:11<8:56:31, 6.80s/it] 61%|██████ | 7458/12188 [16:02:17<8:54:12, 6.78s/it] {'loss': 0.2987, 'grad_norm': 0.6018480969393027, 'learning_rate': 3.4576953760811603e-06, 'epoch': 0.61} + 61%|██████ | 7458/12188 [16:02:17<8:54:12, 6.78s/it] 61%|██████ | 7459/12188 [16:02:25<9:15:47, 7.05s/it] {'loss': 0.3365, 'grad_norm': 0.7510275580880507, 'learning_rate': 3.456431516149587e-06, 'epoch': 0.61} + 61%|██████ | 7459/12188 [16:02:25<9:15:47, 7.05s/it] 61%|██████ | 7460/12188 [16:02:32<9:07:42, 6.95s/it] {'loss': 0.312, 'grad_norm': 0.6750041180836782, 'learning_rate': 3.455167765222339e-06, 'epoch': 0.61} + 61%|██████ | 7460/12188 [16:02:32<9:07:42, 6.95s/it] 61%|██████ | 7461/12188 [16:02:38<9:00:53, 6.87s/it] {'loss': 0.2921, 'grad_norm': 0.6409432525354124, 'learning_rate': 3.4539041233886648e-06, 'epoch': 0.61} + 61%|██████ | 7461/12188 [16:02:38<9:00:53, 6.87s/it] 61%|██████ | 7462/12188 [16:02:45<9:01:47, 6.88s/it] {'loss': 0.3173, 'grad_norm': 0.6351739840636988, 'learning_rate': 3.4526405907377948e-06, 'epoch': 0.61} + 61%|██████ | 7462/12188 [16:02:45<9:01:47, 6.88s/it] 61%|██████ | 7463/12188 [16:02:52<9:04:13, 6.91s/it] {'loss': 0.3241, 'grad_norm': 0.7773492453751744, 'learning_rate': 3.451377167358959e-06, 'epoch': 0.61} + 61%|██████ | 7463/12188 [16:02:52<9:04:13, 6.91s/it] 61%|██████ | 7464/12188 [16:03:00<9:15:46, 7.06s/it] {'loss': 0.3136, 'grad_norm': 0.7419255598687203, 'learning_rate': 3.450113853341383e-06, 'epoch': 0.61} + 61%|██████ | 7464/12188 [16:03:00<9:15:46, 7.06s/it] 61%|██████ | 7465/12188 [16:03:06<9:07:03, 6.95s/it] {'loss': 0.3171, 'grad_norm': 0.7356078327938028, 'learning_rate': 3.4488506487742756e-06, 'epoch': 0.61} + 61%|██████ | 7465/12188 [16:03:06<9:07:03, 6.95s/it] 61%|██████▏ | 7466/12188 [16:03:14<9:11:52, 7.01s/it] {'loss': 0.3315, 'grad_norm': 0.7504075311888254, 'learning_rate': 3.447587553746843e-06, 'epoch': 0.61} + 61%|██████▏ | 7466/12188 [16:03:14<9:11:52, 7.01s/it] 61%|██████▏ | 7467/12188 [16:03:21<9:13:14, 7.03s/it] {'loss': 0.3245, 'grad_norm': 0.7249368532136704, 'learning_rate': 3.4463245683482827e-06, 'epoch': 0.61} + 61%|██████▏ | 7467/12188 [16:03:21<9:13:14, 7.03s/it] 61%|██████▏ | 7468/12188 [16:03:28<9:09:19, 6.98s/it] {'loss': 0.3239, 'grad_norm': 0.6920558309913993, 'learning_rate': 3.445061692667785e-06, 'epoch': 0.61} + 61%|██████▏ | 7468/12188 [16:03:28<9:09:19, 6.98s/it] 61%|██████▏ | 7469/12188 [16:03:34<9:07:57, 6.97s/it] {'loss': 0.307, 'grad_norm': 0.7091783837963869, 'learning_rate': 3.443798926794534e-06, 'epoch': 0.61} + 61%|██████▏ | 7469/12188 [16:03:34<9:07:57, 6.97s/it] 61%|██████▏ | 7470/12188 [16:03:41<9:03:12, 6.91s/it] {'loss': 0.3083, 'grad_norm': 0.6501924505780903, 'learning_rate': 3.4425362708177003e-06, 'epoch': 0.61} + 61%|██████▏ | 7470/12188 [16:03:41<9:03:12, 6.91s/it] 61%|██████▏ | 7471/12188 [16:03:49<9:14:59, 7.06s/it] {'loss': 0.297, 'grad_norm': 0.69317474788538, 'learning_rate': 3.441273724826455e-06, 'epoch': 0.61} + 61%|██████▏ | 7471/12188 [16:03:49<9:14:59, 7.06s/it] 61%|██████▏ | 7472/12188 [16:03:56<9:32:29, 7.28s/it] {'loss': 0.3183, 'grad_norm': 0.8518564800791698, 'learning_rate': 3.440011288909953e-06, 'epoch': 0.61} + 61%|██████▏ | 7472/12188 [16:03:56<9:32:29, 7.28s/it] 61%|██████▏ | 7473/12188 [16:04:05<10:01:21, 7.65s/it] {'loss': 0.324, 'grad_norm': 0.7338758390007157, 'learning_rate': 3.438748963157349e-06, 'epoch': 0.61} + 61%|██████▏ | 7473/12188 [16:04:05<10:01:21, 7.65s/it] 61%|██████▏ | 7474/12188 [16:04:12<9:55:28, 7.58s/it] {'loss': 0.3253, 'grad_norm': 0.6083147501084353, 'learning_rate': 3.437486747657785e-06, 'epoch': 0.61} + 61%|██████▏ | 7474/12188 [16:04:12<9:55:28, 7.58s/it] 61%|██████▏ | 7475/12188 [16:04:20<9:58:11, 7.62s/it] {'loss': 0.3134, 'grad_norm': 0.8624364006846237, 'learning_rate': 3.4362246425003954e-06, 'epoch': 0.61} + 61%|██████▏ | 7475/12188 [16:04:20<9:58:11, 7.62s/it] 61%|██████▏ | 7476/12188 [16:04:27<9:38:26, 7.37s/it] {'loss': 0.2999, 'grad_norm': 0.7195063370826048, 'learning_rate': 3.4349626477743104e-06, 'epoch': 0.61} + 61%|██████▏ | 7476/12188 [16:04:27<9:38:26, 7.37s/it] 61%|██████▏ | 7477/12188 [16:04:35<9:59:55, 7.64s/it] {'loss': 0.3104, 'grad_norm': 0.6450392048144609, 'learning_rate': 3.433700763568647e-06, 'epoch': 0.61} + 61%|██████▏ | 7477/12188 [16:04:35<9:59:55, 7.64s/it] 61%|██████▏ | 7478/12188 [16:04:43<10:06:37, 7.73s/it] {'loss': 0.2998, 'grad_norm': 0.8769068910389266, 'learning_rate': 3.4324389899725203e-06, 'epoch': 0.61} + 61%|██████▏ | 7478/12188 [16:04:43<10:06:37, 7.73s/it] 61%|██████▏ | 7479/12188 [16:04:51<10:06:48, 7.73s/it] {'loss': 0.2992, 'grad_norm': 0.6331468860262652, 'learning_rate': 3.431177327075034e-06, 'epoch': 0.61} + 61%|██████▏ | 7479/12188 [16:04:51<10:06:48, 7.73s/it] 61%|██████▏ | 7480/12188 [16:05:00<10:30:35, 8.04s/it] {'loss': 0.33, 'grad_norm': 0.7048234377565898, 'learning_rate': 3.4299157749652832e-06, 'epoch': 0.61} + 61%|██████▏ | 7480/12188 [16:05:00<10:30:35, 8.04s/it] 61%|██████▏ | 7481/12188 [16:05:07<10:08:48, 7.76s/it] {'loss': 0.3177, 'grad_norm': 0.7951126305980978, 'learning_rate': 3.4286543337323584e-06, 'epoch': 0.61} + 61%|██████▏ | 7481/12188 [16:05:07<10:08:48, 7.76s/it] 61%|██████▏ | 7482/12188 [16:05:13<9:39:43, 7.39s/it] {'loss': 0.2944, 'grad_norm': 0.712567008164625, 'learning_rate': 3.4273930034653414e-06, 'epoch': 0.61} + 61%|██████▏ | 7482/12188 [16:05:13<9:39:43, 7.39s/it] 61%|██████▏ | 7483/12188 [16:05:21<9:42:11, 7.42s/it] {'loss': 0.2765, 'grad_norm': 0.5737334118663148, 'learning_rate': 3.4261317842533016e-06, 'epoch': 0.61} + 61%|██████▏ | 7483/12188 [16:05:21<9:42:11, 7.42s/it] 61%|██████▏ | 7484/12188 [16:05:28<9:31:39, 7.29s/it] {'loss': 0.3313, 'grad_norm': 0.6680393775927773, 'learning_rate': 3.424870676185309e-06, 'epoch': 0.61} + 61%|██████▏ | 7484/12188 [16:05:28<9:31:39, 7.29s/it] 61%|██████▏ | 7485/12188 [16:05:35<9:25:38, 7.22s/it] {'loss': 0.2828, 'grad_norm': 0.6515628958148214, 'learning_rate': 3.423609679350416e-06, 'epoch': 0.61} + 61%|██████▏ | 7485/12188 [16:05:35<9:25:38, 7.22s/it] 61%|██████▏ | 7486/12188 [16:05:45<10:32:34, 8.07s/it] {'loss': 0.2846, 'grad_norm': 0.6802206110074748, 'learning_rate': 3.422348793837676e-06, 'epoch': 0.61} + 61%|██████▏ | 7486/12188 [16:05:45<10:32:34, 8.07s/it] 61%|██████▏ | 7487/12188 [16:05:52<10:17:09, 7.88s/it] {'loss': 0.3341, 'grad_norm': 0.7138411303084394, 'learning_rate': 3.4210880197361305e-06, 'epoch': 0.61} + 61%|██████▏ | 7487/12188 [16:05:52<10:17:09, 7.88s/it] 61%|██████▏ | 7488/12188 [16:05:59<9:56:41, 7.62s/it] {'loss': 0.3099, 'grad_norm': 0.7254740000325566, 'learning_rate': 3.4198273571348107e-06, 'epoch': 0.61} + 61%|██████▏ | 7488/12188 [16:05:59<9:56:41, 7.62s/it] 61%|██████▏ | 7489/12188 [16:06:06<9:47:10, 7.50s/it] {'loss': 0.29, 'grad_norm': 0.6959369156437583, 'learning_rate': 3.4185668061227463e-06, 'epoch': 0.61} + 61%|██████▏ | 7489/12188 [16:06:06<9:47:10, 7.50s/it] 61%|██████▏ | 7490/12188 [16:06:13<9:35:10, 7.35s/it] {'loss': 0.3054, 'grad_norm': 0.7098949857246906, 'learning_rate': 3.4173063667889505e-06, 'epoch': 0.61} + 61%|██████▏ | 7490/12188 [16:06:13<9:35:10, 7.35s/it] 61%|██████▏ | 7491/12188 [16:06:20<9:17:10, 7.12s/it] {'loss': 0.2996, 'grad_norm': 0.6561666675152363, 'learning_rate': 3.416046039222437e-06, 'epoch': 0.61} + 61%|██████▏ | 7491/12188 [16:06:20<9:17:10, 7.12s/it] 61%|██████▏ | 7492/12188 [16:06:28<9:26:30, 7.24s/it] {'loss': 0.3199, 'grad_norm': 0.6689815466432188, 'learning_rate': 3.4147858235122087e-06, 'epoch': 0.61} + 61%|██████▏ | 7492/12188 [16:06:28<9:26:30, 7.24s/it] 61%|██████▏ | 7493/12188 [16:06:35<9:20:45, 7.17s/it] {'loss': 0.3382, 'grad_norm': 0.6238478336796217, 'learning_rate': 3.4135257197472582e-06, 'epoch': 0.61} + 61%|██████▏ | 7493/12188 [16:06:35<9:20:45, 7.17s/it] 61%|██████▏ | 7494/12188 [16:06:42<9:16:42, 7.12s/it] {'loss': 0.2958, 'grad_norm': 0.6319646711504817, 'learning_rate': 3.4122657280165727e-06, 'epoch': 0.61} + 61%|██████▏ | 7494/12188 [16:06:42<9:16:42, 7.12s/it] 61%|██████▏ | 7495/12188 [16:06:49<9:24:02, 7.21s/it] {'loss': 0.3177, 'grad_norm': 0.7131580924523873, 'learning_rate': 3.41100584840913e-06, 'epoch': 0.61} + 61%|██████▏ | 7495/12188 [16:06:49<9:24:02, 7.21s/it] 62%|██████▏ | 7496/12188 [16:06:56<9:19:40, 7.16s/it] {'loss': 0.2986, 'grad_norm': 0.6338243893421869, 'learning_rate': 3.4097460810139006e-06, 'epoch': 0.62} + 62%|██████▏ | 7496/12188 [16:06:56<9:19:40, 7.16s/it] 62%|██████▏ | 7497/12188 [16:07:02<9:02:44, 6.94s/it] {'loss': 0.3124, 'grad_norm': 0.7117382840690364, 'learning_rate': 3.40848642591985e-06, 'epoch': 0.62} + 62%|██████▏ | 7497/12188 [16:07:02<9:02:44, 6.94s/it] 62%|██████▏ | 7498/12188 [16:07:11<9:49:01, 7.54s/it] {'loss': 0.2907, 'grad_norm': 0.817825824829663, 'learning_rate': 3.4072268832159294e-06, 'epoch': 0.62} + 62%|██████▏ | 7498/12188 [16:07:11<9:49:01, 7.54s/it] 62%|██████▏ | 7499/12188 [16:07:18<9:30:00, 7.29s/it] {'loss': 0.3108, 'grad_norm': 0.6734279965836947, 'learning_rate': 3.405967452991088e-06, 'epoch': 0.62} + 62%|██████▏ | 7499/12188 [16:07:18<9:30:00, 7.29s/it] 62%|██████▏ | 7500/12188 [16:07:25<9:16:07, 7.12s/it] {'loss': 0.3165, 'grad_norm': 0.7198714446456803, 'learning_rate': 3.4047081353342644e-06, 'epoch': 0.62} + 62%|██████▏ | 7500/12188 [16:07:25<9:16:07, 7.12s/it] 62%|██████▏ | 7501/12188 [16:07:32<9:12:11, 7.07s/it] {'loss': 0.3224, 'grad_norm': 0.6746380845039942, 'learning_rate': 3.403448930334389e-06, 'epoch': 0.62} + 62%|██████▏ | 7501/12188 [16:07:32<9:12:11, 7.07s/it] 62%|██████▏ | 7502/12188 [16:07:39<9:10:42, 7.05s/it] {'loss': 0.3082, 'grad_norm': 0.7217752117853371, 'learning_rate': 3.4021898380803864e-06, 'epoch': 0.62} + 62%|██████▏ | 7502/12188 [16:07:39<9:10:42, 7.05s/it] 62%|██████▏ | 7503/12188 [16:07:46<9:17:48, 7.14s/it] {'loss': 0.2966, 'grad_norm': 0.6252536658084653, 'learning_rate': 3.400930858661168e-06, 'epoch': 0.62} + 62%|██████▏ | 7503/12188 [16:07:46<9:17:48, 7.14s/it] 62%|██████▏ | 7504/12188 [16:07:53<9:17:33, 7.14s/it] {'loss': 0.3134, 'grad_norm': 0.6275194096570474, 'learning_rate': 3.399671992165643e-06, 'epoch': 0.62} + 62%|██████▏ | 7504/12188 [16:07:53<9:17:33, 7.14s/it] 62%|██████▏ | 7505/12188 [16:08:01<9:20:51, 7.19s/it] {'loss': 0.3187, 'grad_norm': 0.6870729858520332, 'learning_rate': 3.3984132386827127e-06, 'epoch': 0.62} + 62%|██████▏ | 7505/12188 [16:08:01<9:20:51, 7.19s/it] 62%|██████▏ | 7506/12188 [16:08:08<9:29:00, 7.29s/it] {'loss': 0.2744, 'grad_norm': 0.8176781372763191, 'learning_rate': 3.397154598301266e-06, 'epoch': 0.62} + 62%|██████▏ | 7506/12188 [16:08:08<9:29:00, 7.29s/it] 62%|██████▏ | 7507/12188 [16:08:16<9:38:41, 7.42s/it] {'loss': 0.3639, 'grad_norm': 0.6984125376860711, 'learning_rate': 3.395896071110186e-06, 'epoch': 0.62} + 62%|██████▏ | 7507/12188 [16:08:16<9:38:41, 7.42s/it] 62%|██████▏ | 7508/12188 [16:08:23<9:29:56, 7.31s/it] {'loss': 0.2937, 'grad_norm': 0.7620619248704, 'learning_rate': 3.3946376571983486e-06, 'epoch': 0.62} + 62%|██████▏ | 7508/12188 [16:08:23<9:29:56, 7.31s/it] 62%|██████▏ | 7509/12188 [16:08:30<9:26:14, 7.26s/it] {'loss': 0.31, 'grad_norm': 0.6772811405414948, 'learning_rate': 3.3933793566546193e-06, 'epoch': 0.62} + 62%|██████▏ | 7509/12188 [16:08:30<9:26:14, 7.26s/it] 62%|██████▏ | 7510/12188 [16:08:38<9:49:49, 7.57s/it] {'loss': 0.331, 'grad_norm': 0.6386768741304198, 'learning_rate': 3.3921211695678608e-06, 'epoch': 0.62} + 62%|██████▏ | 7510/12188 [16:08:38<9:49:49, 7.57s/it] 62%|██████▏ | 7511/12188 [16:08:45<9:40:58, 7.45s/it] {'loss': 0.3233, 'grad_norm': 0.748035218966803, 'learning_rate': 3.39086309602692e-06, 'epoch': 0.62} + 62%|██████▏ | 7511/12188 [16:08:45<9:40:58, 7.45s/it] 62%|██████▏ | 7512/12188 [16:08:53<9:37:18, 7.41s/it] {'loss': 0.3016, 'grad_norm': 0.7288109461551766, 'learning_rate': 3.389605136120644e-06, 'epoch': 0.62} + 62%|██████▏ | 7512/12188 [16:08:53<9:37:18, 7.41s/it] 62%|██████▏ | 7513/12188 [16:09:01<10:03:39, 7.75s/it] {'loss': 0.3024, 'grad_norm': 0.6421720516001497, 'learning_rate': 3.3883472899378643e-06, 'epoch': 0.62} + 62%|██████▏ | 7513/12188 [16:09:01<10:03:39, 7.75s/it] 62%|██████▏ | 7514/12188 [16:09:08<9:48:55, 7.56s/it] {'loss': 0.3378, 'grad_norm': 0.6455920982216345, 'learning_rate': 3.3870895575674103e-06, 'epoch': 0.62} + 62%|██████▏ | 7514/12188 [16:09:08<9:48:55, 7.56s/it] 62%|██████▏ | 7515/12188 [16:09:16<9:47:22, 7.54s/it] {'loss': 0.3463, 'grad_norm': 0.6869153777451609, 'learning_rate': 3.3858319390981014e-06, 'epoch': 0.62} + 62%|██████▏ | 7515/12188 [16:09:16<9:47:22, 7.54s/it] 62%|██████▏ | 7516/12188 [16:09:23<9:30:10, 7.32s/it] {'loss': 0.3234, 'grad_norm': 0.788799335802774, 'learning_rate': 3.384574434618746e-06, 'epoch': 0.62} + 62%|██████▏ | 7516/12188 [16:09:23<9:30:10, 7.32s/it] 62%|██████▏ | 7517/12188 [16:09:29<9:13:18, 7.11s/it] {'loss': 0.3329, 'grad_norm': 0.7791786799299534, 'learning_rate': 3.3833170442181485e-06, 'epoch': 0.62} + 62%|██████▏ | 7517/12188 [16:09:29<9:13:18, 7.11s/it] 62%|██████▏ | 7518/12188 [16:09:37<9:23:44, 7.24s/it] {'loss': 0.2854, 'grad_norm': 0.613976765340438, 'learning_rate': 3.3820597679851053e-06, 'epoch': 0.62} + 62%|██████▏ | 7518/12188 [16:09:37<9:23:44, 7.24s/it] 62%|██████▏ | 7519/12188 [16:09:44<9:31:36, 7.35s/it] {'loss': 0.3454, 'grad_norm': 0.7032968891791315, 'learning_rate': 3.380802606008401e-06, 'epoch': 0.62} + 62%|██████▏ | 7519/12188 [16:09:44<9:31:36, 7.35s/it] 62%|██████▏ | 7520/12188 [16:09:52<9:24:14, 7.25s/it] {'loss': 0.3236, 'grad_norm': 0.7295843461391748, 'learning_rate': 3.379545558376816e-06, 'epoch': 0.62} + 62%|██████▏ | 7520/12188 [16:09:52<9:24:14, 7.25s/it] 62%|██████▏ | 7521/12188 [16:09:58<9:16:07, 7.15s/it] {'loss': 0.2907, 'grad_norm': 1.2074533913747016, 'learning_rate': 3.3782886251791204e-06, 'epoch': 0.62} + 62%|██████▏ | 7521/12188 [16:09:58<9:16:07, 7.15s/it] 62%|██████▏ | 7522/12188 [16:10:05<9:09:22, 7.06s/it] {'loss': 0.3241, 'grad_norm': 0.7382944400292841, 'learning_rate': 3.377031806504075e-06, 'epoch': 0.62} + 62%|██████▏ | 7522/12188 [16:10:05<9:09:22, 7.06s/it] 62%|██████▏ | 7523/12188 [16:10:13<9:26:06, 7.28s/it] {'loss': 0.2874, 'grad_norm': 0.7284580475125412, 'learning_rate': 3.3757751024404373e-06, 'epoch': 0.62} + 62%|██████▏ | 7523/12188 [16:10:13<9:26:06, 7.28s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f5e620cda30> +[Try #0] Failed to fetch sample 4879917 in VC:s3://gui/OS-Atlas/desktop_domain/linux_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f5e620cda30> +Problematic sample: {'image': 'output_20240912_163313_original_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Add a Printer…'"}, {'from': 'gpt', 'value': '\nclick(x=0.123, y=0.0944)\n'}]} + 62%|██████▏ | 7524/12188 [16:10:20<9:26:41, 7.29s/it] {'loss': 0.3065, 'grad_norm': 0.6354568757800065, 'learning_rate': 3.374518513076951e-06, 'epoch': 0.62} + 62%|██████▏ | 7524/12188 [16:10:20<9:26:41, 7.29s/it] 62%|██████▏ | 7525/12188 [16:10:27<9:13:59, 7.13s/it] {'loss': 0.3303, 'grad_norm': 0.6301469655514427, 'learning_rate': 3.3732620385023584e-06, 'epoch': 0.62} + 62%|██████▏ | 7525/12188 [16:10:27<9:13:59, 7.13s/it] 62%|██████▏ | 7526/12188 [16:10:35<9:22:03, 7.23s/it] {'loss': 0.3075, 'grad_norm': 0.6657003331054802, 'learning_rate': 3.372005678805384e-06, 'epoch': 0.62} + 62%|██████▏ | 7526/12188 [16:10:35<9:22:03, 7.23s/it] 62%|██████▏ | 7527/12188 [16:10:41<9:12:07, 7.11s/it] {'loss': 0.3191, 'grad_norm': 0.7616381962015818, 'learning_rate': 3.370749434074755e-06, 'epoch': 0.62} + 62%|██████▏ | 7527/12188 [16:10:41<9:12:07, 7.11s/it] 62%|██████▏ | 7528/12188 [16:10:48<9:02:59, 6.99s/it] {'loss': 0.3154, 'grad_norm': 0.6790141388108708, 'learning_rate': 3.3694933043991824e-06, 'epoch': 0.62} + 62%|██████▏ | 7528/12188 [16:10:48<9:02:59, 6.99s/it] 62%|██████▏ | 7529/12188 [16:10:55<9:05:07, 7.02s/it] {'loss': 0.3312, 'grad_norm': 0.6506717660850097, 'learning_rate': 3.368237289867373e-06, 'epoch': 0.62} + 62%|██████▏ | 7529/12188 [16:10:55<9:05:07, 7.02s/it] 62%|██████▏ | 7530/12188 [16:11:02<9:06:13, 7.04s/it] {'loss': 0.292, 'grad_norm': 0.800079320486423, 'learning_rate': 3.3669813905680264e-06, 'epoch': 0.62} + 62%|██████▏ | 7530/12188 [16:11:02<9:06:13, 7.04s/it] 62%|██████▏ | 7531/12188 [16:11:09<8:59:55, 6.96s/it] {'loss': 0.318, 'grad_norm': 0.6692564168117523, 'learning_rate': 3.365725606589828e-06, 'epoch': 0.62} + 62%|██████▏ | 7531/12188 [16:11:09<8:59:55, 6.96s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f399347e5c0> +[Try #0] Failed to fetch sample 4394781 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f399347e5c0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Mobile view'"}, {'from': 'gpt', 'value': '\nclick(x=0.9115, y=0.608)\n'}]} + 62%|██████▏ | 7532/12188 [16:11:16<8:59:30, 6.95s/it] {'loss': 0.308, 'grad_norm': 0.6935169853576456, 'learning_rate': 3.364469938021462e-06, 'epoch': 0.62} + 62%|██████▏ | 7532/12188 [16:11:16<8:59:30, 6.95s/it] 62%|██████▏ | 7533/12188 [16:11:24<9:19:44, 7.21s/it] {'loss': 0.3047, 'grad_norm': 0.7284017149940257, 'learning_rate': 3.3632143849516025e-06, 'epoch': 0.62} + 62%|██████▏ | 7533/12188 [16:11:24<9:19:44, 7.21s/it] 62%|██████▏ | 7534/12188 [16:11:31<9:07:04, 7.05s/it] {'loss': 0.318, 'grad_norm': 0.650959195818578, 'learning_rate': 3.361958947468912e-06, 'epoch': 0.62} + 62%|██████▏ | 7534/12188 [16:11:31<9:07:04, 7.05s/it] 62%|██████▏ | 7535/12188 [16:11:39<9:37:42, 7.45s/it] {'loss': 0.335, 'grad_norm': 0.6906755832591244, 'learning_rate': 3.3607036256620496e-06, 'epoch': 0.62} + 62%|██████▏ | 7535/12188 [16:11:39<9:37:42, 7.45s/it] 62%|██████▏ | 7536/12188 [16:11:46<9:35:15, 7.42s/it] {'loss': 0.3276, 'grad_norm': 0.7791541392989837, 'learning_rate': 3.3594484196196643e-06, 'epoch': 0.62} + 62%|██████▏ | 7536/12188 [16:11:46<9:35:15, 7.42s/it] 62%|██████▏ | 7537/12188 [16:11:55<9:57:18, 7.71s/it] {'loss': 0.3419, 'grad_norm': 0.6979164033222154, 'learning_rate': 3.358193329430394e-06, 'epoch': 0.62} + 62%|██████▏ | 7537/12188 [16:11:55<9:57:18, 7.71s/it] 62%|██████▏ | 7538/12188 [16:12:03<10:01:35, 7.76s/it] {'loss': 0.2964, 'grad_norm': 0.6687142447717542, 'learning_rate': 3.3569383551828756e-06, 'epoch': 0.62} + 62%|██████▏ | 7538/12188 [16:12:03<10:01:35, 7.76s/it] 62%|██████▏ | 7539/12188 [16:12:10<9:50:03, 7.62s/it] {'loss': 0.3332, 'grad_norm': 0.7235690064184076, 'learning_rate': 3.3556834969657283e-06, 'epoch': 0.62} + 62%|██████▏ | 7539/12188 [16:12:10<9:50:03, 7.62s/it] 62%|██████▏ | 7540/12188 [16:12:17<9:31:22, 7.38s/it] {'loss': 0.28, 'grad_norm': 0.6426933415746802, 'learning_rate': 3.3544287548675713e-06, 'epoch': 0.62} + 62%|██████▏ | 7540/12188 [16:12:17<9:31:22, 7.38s/it] 62%|██████▏ | 7541/12188 [16:12:24<9:25:24, 7.30s/it] {'loss': 0.3405, 'grad_norm': 0.7403596840122827, 'learning_rate': 3.3531741289770124e-06, 'epoch': 0.62} + 62%|██████▏ | 7541/12188 [16:12:24<9:25:24, 7.30s/it] 62%|██████▏ | 7542/12188 [16:12:30<9:12:02, 7.13s/it] {'loss': 0.3233, 'grad_norm': 0.7097018396584912, 'learning_rate': 3.3519196193826503e-06, 'epoch': 0.62} + 62%|██████▏ | 7542/12188 [16:12:30<9:12:02, 7.13s/it] 62%|██████▏ | 7543/12188 [16:12:37<9:00:26, 6.98s/it] {'loss': 0.3066, 'grad_norm': 0.6987950128497856, 'learning_rate': 3.350665226173078e-06, 'epoch': 0.62} + 62%|██████▏ | 7543/12188 [16:12:37<9:00:26, 6.98s/it] 62%|██████▏ | 7544/12188 [16:12:44<8:54:14, 6.90s/it] {'loss': 0.2846, 'grad_norm': 0.6328997098871462, 'learning_rate': 3.3494109494368753e-06, 'epoch': 0.62} + 62%|██████▏ | 7544/12188 [16:12:44<8:54:14, 6.90s/it] 62%|██████▏ | 7545/12188 [16:12:51<8:53:43, 6.90s/it] {'loss': 0.3115, 'grad_norm': 0.835539530691368, 'learning_rate': 3.34815678926262e-06, 'epoch': 0.62} + 62%|██████▏ | 7545/12188 [16:12:51<8:53:43, 6.90s/it] 62%|██████▏ | 7546/12188 [16:12:58<9:08:11, 7.09s/it] {'loss': 0.2769, 'grad_norm': 0.6209099137644976, 'learning_rate': 3.34690274573888e-06, 'epoch': 0.62} + 62%|██████▏ | 7546/12188 [16:12:58<9:08:11, 7.09s/it] 62%|██████▏ | 7547/12188 [16:13:05<9:08:11, 7.09s/it] {'loss': 0.3585, 'grad_norm': 0.6281132850778627, 'learning_rate': 3.34564881895421e-06, 'epoch': 0.62} + 62%|██████▏ | 7547/12188 [16:13:05<9:08:11, 7.09s/it] 62%|██████▏ | 7548/12188 [16:13:14<9:34:54, 7.43s/it] {'loss': 0.2997, 'grad_norm': 0.6575364683374894, 'learning_rate': 3.3443950089971655e-06, 'epoch': 0.62} + 62%|██████▏ | 7548/12188 [16:13:14<9:34:54, 7.43s/it] 62%|██████▏ | 7549/12188 [16:13:20<9:17:18, 7.21s/it] {'loss': 0.3297, 'grad_norm': 0.7499722401530369, 'learning_rate': 3.343141315956283e-06, 'epoch': 0.62} + 62%|██████▏ | 7549/12188 [16:13:20<9:17:18, 7.21s/it] 62%|██████▏ | 7550/12188 [16:13:28<9:23:26, 7.29s/it] {'loss': 0.3458, 'grad_norm': 1.2048050150843221, 'learning_rate': 3.3418877399200987e-06, 'epoch': 0.62} + 62%|██████▏ | 7550/12188 [16:13:28<9:23:26, 7.29s/it] 62%|██████▏ | 7551/12188 [16:13:34<9:08:32, 7.10s/it] {'loss': 0.3076, 'grad_norm': 0.6541952783052477, 'learning_rate': 3.3406342809771407e-06, 'epoch': 0.62} + 62%|██████▏ | 7551/12188 [16:13:34<9:08:32, 7.10s/it] 62%|██████▏ | 7552/12188 [16:13:41<9:03:40, 7.04s/it] {'loss': 0.3001, 'grad_norm': 0.6911348009584296, 'learning_rate': 3.339380939215921e-06, 'epoch': 0.62} + 62%|██████▏ | 7552/12188 [16:13:41<9:03:40, 7.04s/it] 62%|██████▏ | 7553/12188 [16:13:49<9:20:01, 7.25s/it] {'loss': 0.308, 'grad_norm': 0.644953118718961, 'learning_rate': 3.338127714724951e-06, 'epoch': 0.62} + 62%|██████▏ | 7553/12188 [16:13:49<9:20:01, 7.25s/it] 62%|██████▏ | 7554/12188 [16:13:56<9:20:37, 7.26s/it] {'loss': 0.2494, 'grad_norm': 0.6901075093458734, 'learning_rate': 3.336874607592735e-06, 'epoch': 0.62} + 62%|██████▏ | 7554/12188 [16:13:56<9:20:37, 7.26s/it] 62%|██████▏ | 7555/12188 [16:14:04<9:22:30, 7.28s/it] {'loss': 0.3062, 'grad_norm': 0.7143753501988912, 'learning_rate': 3.3356216179077604e-06, 'epoch': 0.62} + 62%|██████▏ | 7555/12188 [16:14:04<9:22:30, 7.28s/it] 62%|██████▏ | 7556/12188 [16:14:12<9:55:30, 7.71s/it] {'loss': 0.332, 'grad_norm': 0.7048752312064426, 'learning_rate': 3.334368745758514e-06, 'epoch': 0.62} + 62%|██████▏ | 7556/12188 [16:14:12<9:55:30, 7.71s/it] 62%|██████▏ | 7557/12188 [16:14:20<9:53:55, 7.69s/it] {'loss': 0.3262, 'grad_norm': 0.623070057404465, 'learning_rate': 3.33311599123347e-06, 'epoch': 0.62} + 62%|██████▏ | 7557/12188 [16:14:20<9:53:55, 7.69s/it] 62%|██████▏ | 7558/12188 [16:14:27<9:35:48, 7.46s/it] {'loss': 0.3306, 'grad_norm': 0.7795394318507216, 'learning_rate': 3.3318633544210953e-06, 'epoch': 0.62} + 62%|██████▏ | 7558/12188 [16:14:27<9:35:48, 7.46s/it] 62%|██████▏ | 7559/12188 [16:14:34<9:25:18, 7.33s/it] {'loss': 0.2938, 'grad_norm': 0.6540709748484987, 'learning_rate': 3.3306108354098534e-06, 'epoch': 0.62} + 62%|██████▏ | 7559/12188 [16:14:34<9:25:18, 7.33s/it] 62%|██████▏ | 7560/12188 [16:14:41<9:13:18, 7.17s/it] {'loss': 0.3333, 'grad_norm': 0.6545650471626023, 'learning_rate': 3.329358434288189e-06, 'epoch': 0.62} + 62%|██████▏ | 7560/12188 [16:14:41<9:13:18, 7.17s/it] 62%|██████▏ | 7561/12188 [16:14:48<9:09:56, 7.13s/it] {'loss': 0.3139, 'grad_norm': 0.8073739079506727, 'learning_rate': 3.328106151144551e-06, 'epoch': 0.62} + 62%|██████▏ | 7561/12188 [16:14:48<9:09:56, 7.13s/it] 62%|██████▏ | 7562/12188 [16:14:56<9:26:44, 7.35s/it] {'loss': 0.3244, 'grad_norm': 0.6950337823452881, 'learning_rate': 3.3268539860673675e-06, 'epoch': 0.62} + 62%|██████▏ | 7562/12188 [16:14:56<9:26:44, 7.35s/it] 62%|██████▏ | 7563/12188 [16:15:02<9:12:00, 7.16s/it] {'loss': 0.2973, 'grad_norm': 0.6574677846062877, 'learning_rate': 3.3256019391450696e-06, 'epoch': 0.62} + 62%|██████▏ | 7563/12188 [16:15:02<9:12:00, 7.16s/it] 62%|██████▏ | 7564/12188 [16:15:09<9:05:19, 7.08s/it] {'loss': 0.3483, 'grad_norm': 0.6538032688180658, 'learning_rate': 3.324350010466073e-06, 'epoch': 0.62} + 62%|██████▏ | 7564/12188 [16:15:09<9:05:19, 7.08s/it] 62%|██████▏ | 7565/12188 [16:15:17<9:09:39, 7.13s/it] {'loss': 0.3002, 'grad_norm': 0.6681391071691531, 'learning_rate': 3.3230982001187843e-06, 'epoch': 0.62} + 62%|██████▏ | 7565/12188 [16:15:17<9:09:39, 7.13s/it] 62%|██████▏ | 7566/12188 [16:15:25<9:30:03, 7.40s/it] {'loss': 0.2912, 'grad_norm': 0.7462950808152023, 'learning_rate': 3.321846508191609e-06, 'epoch': 0.62} + 62%|██████▏ | 7566/12188 [16:15:25<9:30:03, 7.40s/it] 62%|██████▏ | 7567/12188 [16:15:33<9:56:39, 7.75s/it] {'loss': 0.3219, 'grad_norm': 0.6103705353176, 'learning_rate': 3.320594934772935e-06, 'epoch': 0.62} + 62%|██████▏ | 7567/12188 [16:15:33<9:56:39, 7.75s/it] 62%|██████▏ | 7568/12188 [16:15:41<9:54:47, 7.72s/it] {'loss': 0.3014, 'grad_norm': 0.6582805026074374, 'learning_rate': 3.319343479951148e-06, 'epoch': 0.62} + 62%|██████▏ | 7568/12188 [16:15:41<9:54:47, 7.72s/it] 62%|██████▏ | 7569/12188 [16:15:48<9:32:09, 7.43s/it] {'loss': 0.2941, 'grad_norm': 0.6845052462658069, 'learning_rate': 3.3180921438146276e-06, 'epoch': 0.62} + 62%|██████▏ | 7569/12188 [16:15:48<9:32:09, 7.43s/it] 62%|██████▏ | 7570/12188 [16:15:54<9:21:38, 7.30s/it] {'loss': 0.3279, 'grad_norm': 0.639110799443921, 'learning_rate': 3.3168409264517356e-06, 'epoch': 0.62} + 62%|██████▏ | 7570/12188 [16:15:55<9:21:38, 7.30s/it] 62%|██████▏ | 7571/12188 [16:16:01<9:08:05, 7.12s/it] {'loss': 0.3127, 'grad_norm': 0.736165339355737, 'learning_rate': 3.3155898279508323e-06, 'epoch': 0.62} + 62%|██████▏ | 7571/12188 [16:16:01<9:08:05, 7.12s/it] 62%|██████▏ | 7572/12188 [16:16:08<9:06:19, 7.10s/it] {'loss': 0.336, 'grad_norm': 0.6917465023308873, 'learning_rate': 3.3143388484002714e-06, 'epoch': 0.62} + 62%|██████▏ | 7572/12188 [16:16:08<9:06:19, 7.10s/it] 62%|██████▏ | 7573/12188 [16:16:16<9:15:16, 7.22s/it] {'loss': 0.2795, 'grad_norm': 0.7103985329294301, 'learning_rate': 3.3130879878883904e-06, 'epoch': 0.62} + 62%|██████▏ | 7573/12188 [16:16:16<9:15:16, 7.22s/it] 62%|██████▏ | 7574/12188 [16:16:23<9:23:13, 7.32s/it] {'loss': 0.3117, 'grad_norm': 0.6904405776453764, 'learning_rate': 3.311837246503528e-06, 'epoch': 0.62} + 62%|██████▏ | 7574/12188 [16:16:23<9:23:13, 7.32s/it] 62%|██████▏ | 7575/12188 [16:16:30<9:15:04, 7.22s/it] {'loss': 0.3028, 'grad_norm': 0.6184240555202316, 'learning_rate': 3.3105866243340045e-06, 'epoch': 0.62} + 62%|██████▏ | 7575/12188 [16:16:30<9:15:04, 7.22s/it] 62%|██████▏ | 7576/12188 [16:16:38<9:18:59, 7.27s/it] {'loss': 0.3217, 'grad_norm': 0.65555204952658, 'learning_rate': 3.3093361214681406e-06, 'epoch': 0.62} + 62%|██████▏ | 7576/12188 [16:16:38<9:18:59, 7.27s/it] 62%|██████▏ | 7577/12188 [16:16:44<9:05:30, 7.10s/it] {'loss': 0.3377, 'grad_norm': 0.6871698603735688, 'learning_rate': 3.308085737994245e-06, 'epoch': 0.62} + 62%|██████▏ | 7577/12188 [16:16:44<9:05:30, 7.10s/it] 62%|██████▏ | 7578/12188 [16:16:51<8:58:06, 7.00s/it] {'loss': 0.2967, 'grad_norm': 0.7405368004831698, 'learning_rate': 3.3068354740006137e-06, 'epoch': 0.62} + 62%|██████▏ | 7578/12188 [16:16:51<8:58:06, 7.00s/it] 62%|██████▏ | 7579/12188 [16:16:58<8:48:51, 6.88s/it] {'loss': 0.3238, 'grad_norm': 0.6917438130066078, 'learning_rate': 3.3055853295755434e-06, 'epoch': 0.62} + 62%|██████▏ | 7579/12188 [16:16:58<8:48:51, 6.88s/it] 62%|██████▏ | 7580/12188 [16:17:05<9:04:33, 7.09s/it] {'loss': 0.3166, 'grad_norm': 0.8261027281497243, 'learning_rate': 3.304335304807312e-06, 'epoch': 0.62} + 62%|██████▏ | 7580/12188 [16:17:05<9:04:33, 7.09s/it] 62%|██████▏ | 7581/12188 [16:17:13<9:14:23, 7.22s/it] {'loss': 0.3027, 'grad_norm': 0.6637225985660443, 'learning_rate': 3.303085399784198e-06, 'epoch': 0.62} + 62%|██████▏ | 7581/12188 [16:17:13<9:14:23, 7.22s/it] 62%|██████▏ | 7582/12188 [16:17:20<9:06:50, 7.12s/it] {'loss': 0.3363, 'grad_norm': 0.6545811016796896, 'learning_rate': 3.3018356145944686e-06, 'epoch': 0.62} + 62%|██████▏ | 7582/12188 [16:17:20<9:06:50, 7.12s/it] 62%|██████▏ | 7583/12188 [16:17:28<9:26:57, 7.39s/it] {'loss': 0.3368, 'grad_norm': 0.6565268784618141, 'learning_rate': 3.300585949326379e-06, 'epoch': 0.62} + 62%|██████▏ | 7583/12188 [16:17:28<9:26:57, 7.39s/it] 62%|██████▏ | 7584/12188 [16:17:35<9:12:47, 7.20s/it] {'loss': 0.3563, 'grad_norm': 0.7128030550777129, 'learning_rate': 3.2993364040681788e-06, 'epoch': 0.62} + 62%|██████▏ | 7584/12188 [16:17:35<9:12:47, 7.20s/it] 62%|██████▏ | 7585/12188 [16:17:41<9:03:46, 7.09s/it] {'loss': 0.3172, 'grad_norm': 0.6559060072883954, 'learning_rate': 3.2980869789081093e-06, 'epoch': 0.62} + 62%|██████▏ | 7585/12188 [16:17:41<9:03:46, 7.09s/it] 62%|██████▏ | 7586/12188 [16:17:50<9:29:16, 7.42s/it] {'loss': 0.279, 'grad_norm': 0.6230723638420859, 'learning_rate': 3.296837673934403e-06, 'epoch': 0.62} + 62%|██████▏ | 7586/12188 [16:17:50<9:29:16, 7.42s/it] 62%|██████▏ | 7587/12188 [16:17:56<9:16:32, 7.26s/it] {'loss': 0.3002, 'grad_norm': 1.23137226802982, 'learning_rate': 3.2955884892352853e-06, 'epoch': 0.62} + 62%|██████▏ | 7587/12188 [16:17:56<9:16:32, 7.26s/it] 62%|██████▏ | 7588/12188 [16:18:05<9:44:24, 7.62s/it] {'loss': 0.317, 'grad_norm': 0.6773851318725477, 'learning_rate': 3.2943394248989673e-06, 'epoch': 0.62} + 62%|██████▏ | 7588/12188 [16:18:05<9:44:24, 7.62s/it] 62%|██████▏ | 7589/12188 [16:18:11<9:17:53, 7.28s/it] {'loss': 0.2885, 'grad_norm': 0.7142611252247938, 'learning_rate': 3.293090481013661e-06, 'epoch': 0.62} + 62%|██████▏ | 7589/12188 [16:18:11<9:17:53, 7.28s/it] 62%|██████▏ | 7590/12188 [16:18:19<9:25:52, 7.38s/it] {'loss': 0.3272, 'grad_norm': 0.6565767927977971, 'learning_rate': 3.2918416576675617e-06, 'epoch': 0.62} + 62%|██████▏ | 7590/12188 [16:18:19<9:25:52, 7.38s/it] 62%|██████▏ | 7591/12188 [16:18:28<9:55:22, 7.77s/it] {'loss': 0.2938, 'grad_norm': 0.6510429075026453, 'learning_rate': 3.2905929549488602e-06, 'epoch': 0.62} + 62%|██████▏ | 7591/12188 [16:18:28<9:55:22, 7.77s/it] 62%|██████▏ | 7592/12188 [16:18:34<9:28:10, 7.42s/it] {'loss': 0.3014, 'grad_norm': 0.6644473141626085, 'learning_rate': 3.28934437294574e-06, 'epoch': 0.62} + 62%|██████▏ | 7592/12188 [16:18:34<9:28:10, 7.42s/it] 62%|██████▏ | 7593/12188 [16:18:41<9:23:11, 7.35s/it] {'loss': 0.3112, 'grad_norm': 0.8018310771218468, 'learning_rate': 3.2880959117463686e-06, 'epoch': 0.62} + 62%|██████▏ | 7593/12188 [16:18:42<9:23:11, 7.35s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 62%|██████▏ | 7594/12188 [16:18:48<9:01:09, 7.07s/it] {'loss': 0.6607, 'grad_norm': 0.6270969929719568, 'learning_rate': 3.2868475714389137e-06, 'epoch': 0.62} + 62%|██████▏ | 7594/12188 [16:18:48<9:01:09, 7.07s/it] 62%|██████▏ | 7595/12188 [16:18:56<9:13:48, 7.23s/it] {'loss': 0.2765, 'grad_norm': 0.6733238654310781, 'learning_rate': 3.285599352111533e-06, 'epoch': 0.62} + 62%|██████▏ | 7595/12188 [16:18:56<9:13:48, 7.23s/it] 62%|██████▏ | 7596/12188 [16:19:03<9:09:39, 7.18s/it] {'loss': 0.3275, 'grad_norm': 0.7574502687870791, 'learning_rate': 3.2843512538523696e-06, 'epoch': 0.62} + 62%|██████▏ | 7596/12188 [16:19:03<9:09:39, 7.18s/it] 62%|██████▏ | 7597/12188 [16:19:10<9:05:27, 7.13s/it] {'loss': 0.3045, 'grad_norm': 0.6421632014890413, 'learning_rate': 3.283103276749566e-06, 'epoch': 0.62} + 62%|██████▏ | 7597/12188 [16:19:10<9:05:27, 7.13s/it] 62%|██████▏ | 7598/12188 [16:19:17<9:13:31, 7.24s/it] {'loss': 0.2928, 'grad_norm': 0.6485635386393759, 'learning_rate': 3.2818554208912505e-06, 'epoch': 0.62} + 62%|██████▏ | 7598/12188 [16:19:17<9:13:31, 7.24s/it] 62%|██████▏ | 7599/12188 [16:19:24<9:17:24, 7.29s/it] {'loss': 0.3587, 'grad_norm': 0.6389389984988755, 'learning_rate': 3.2806076863655435e-06, 'epoch': 0.62} + 62%|██████▏ | 7599/12188 [16:19:24<9:17:24, 7.29s/it] 62%|██████▏ | 7600/12188 [16:19:31<9:10:40, 7.20s/it] {'loss': 0.3386, 'grad_norm': 0.6447973687662982, 'learning_rate': 3.279360073260561e-06, 'epoch': 0.62} + 62%|██████▏ | 7600/12188 [16:19:31<9:10:40, 7.20s/it] 62%|██████▏ | 7601/12188 [16:19:38<8:59:14, 7.05s/it] {'loss': 0.3004, 'grad_norm': 0.6761695781447997, 'learning_rate': 3.278112581664403e-06, 'epoch': 0.62} + 62%|██████▏ | 7601/12188 [16:19:38<8:59:14, 7.05s/it] 62%|██████▏ | 7602/12188 [16:19:46<9:06:07, 7.15s/it] {'loss': 0.3443, 'grad_norm': 0.6870705675501044, 'learning_rate': 3.276865211665171e-06, 'epoch': 0.62} + 62%|██████▏ | 7602/12188 [16:19:46<9:06:07, 7.15s/it] 62%|██████▏ | 7603/12188 [16:19:53<9:20:48, 7.34s/it] {'loss': 0.2804, 'grad_norm': 0.6716941941358516, 'learning_rate': 3.275617963350948e-06, 'epoch': 0.62} + 62%|██████▏ | 7603/12188 [16:19:53<9:20:48, 7.34s/it] 62%|██████▏ | 7604/12188 [16:20:00<9:13:47, 7.25s/it] {'loss': 0.3137, 'grad_norm': 0.7187412283971988, 'learning_rate': 3.274370836809814e-06, 'epoch': 0.62} + 62%|██████▏ | 7604/12188 [16:20:00<9:13:47, 7.25s/it] 62%|██████▏ | 7605/12188 [16:20:08<9:20:27, 7.34s/it] {'loss': 0.289, 'grad_norm': 0.656826475728549, 'learning_rate': 3.2731238321298397e-06, 'epoch': 0.62} + 62%|██████▏ | 7605/12188 [16:20:08<9:20:27, 7.34s/it] 62%|██████▏ | 7606/12188 [16:20:15<9:13:55, 7.25s/it] {'loss': 0.3184, 'grad_norm': 0.7163573044219866, 'learning_rate': 3.2718769493990843e-06, 'epoch': 0.62} + 62%|██████▏ | 7606/12188 [16:20:15<9:13:55, 7.25s/it] 62%|██████▏ | 7607/12188 [16:20:23<9:38:53, 7.58s/it] {'loss': 0.3289, 'grad_norm': 0.6632536538800159, 'learning_rate': 3.2706301887056025e-06, 'epoch': 0.62} + 62%|██████▏ | 7607/12188 [16:20:23<9:38:53, 7.58s/it] 62%|██████▏ | 7608/12188 [16:20:30<9:26:47, 7.43s/it] {'loss': 0.3029, 'grad_norm': 0.6727117814889959, 'learning_rate': 3.2693835501374407e-06, 'epoch': 0.62} + 62%|██████▏ | 7608/12188 [16:20:30<9:26:47, 7.43s/it] 62%|██████▏ | 7609/12188 [16:20:38<9:32:45, 7.51s/it] {'loss': 0.303, 'grad_norm': 0.6460300073450925, 'learning_rate': 3.2681370337826297e-06, 'epoch': 0.62} + 62%|██████▏ | 7609/12188 [16:20:38<9:32:45, 7.51s/it] 62%|██████▏ | 7610/12188 [16:20:45<9:10:49, 7.22s/it] {'loss': 0.3117, 'grad_norm': 0.6183842907455513, 'learning_rate': 3.266890639729201e-06, 'epoch': 0.62} + 62%|██████▏ | 7610/12188 [16:20:45<9:10:49, 7.22s/it] 62%|██████▏ | 7611/12188 [16:20:52<9:19:59, 7.34s/it] {'loss': 0.3635, 'grad_norm': 0.6544970210342772, 'learning_rate': 3.2656443680651696e-06, 'epoch': 0.62} + 62%|██████▏ | 7611/12188 [16:20:52<9:19:59, 7.34s/it] 62%|██████▏ | 7612/12188 [16:20:59<9:13:53, 7.26s/it] {'loss': 0.3563, 'grad_norm': 0.6656578802042, 'learning_rate': 3.2643982188785457e-06, 'epoch': 0.62} + 62%|██████▏ | 7612/12188 [16:20:59<9:13:53, 7.26s/it] 62%|██████▏ | 7613/12188 [16:21:06<9:09:46, 7.21s/it] {'loss': 0.3003, 'grad_norm': 0.6313341102642193, 'learning_rate': 3.263152192257333e-06, 'epoch': 0.62} + 62%|██████▏ | 7613/12188 [16:21:06<9:09:46, 7.21s/it] 62%|██████▏ | 7614/12188 [16:21:15<9:42:25, 7.64s/it] {'loss': 0.3229, 'grad_norm': 0.7374257615090007, 'learning_rate': 3.26190628828952e-06, 'epoch': 0.62} + 62%|██████▏ | 7614/12188 [16:21:15<9:42:25, 7.64s/it] 62%|██████▏ | 7615/12188 [16:21:23<9:37:50, 7.58s/it] {'loss': 0.2967, 'grad_norm': 0.6675910000584917, 'learning_rate': 3.2606605070630946e-06, 'epoch': 0.62} + 62%|██████▏ | 7615/12188 [16:21:23<9:37:50, 7.58s/it] 62%|██████▏ | 7616/12188 [16:21:30<9:33:09, 7.52s/it] {'loss': 0.3059, 'grad_norm': 0.7093522486851205, 'learning_rate': 3.2594148486660277e-06, 'epoch': 0.62} + 62%|██████▏ | 7616/12188 [16:21:30<9:33:09, 7.52s/it] 62%|██████▏ | 7617/12188 [16:21:38<9:47:44, 7.71s/it] {'loss': 0.2957, 'grad_norm': 0.685210899494753, 'learning_rate': 3.258169313186289e-06, 'epoch': 0.62} + 62%|██████▏ | 7617/12188 [16:21:38<9:47:44, 7.71s/it] 63%|██████▎ | 7618/12188 [16:21:45<9:23:49, 7.40s/it] {'loss': 0.3335, 'grad_norm': 0.6685619717718588, 'learning_rate': 3.2569239007118346e-06, 'epoch': 0.63} + 63%|██████▎ | 7618/12188 [16:21:45<9:23:49, 7.40s/it] 63%|██████▎ | 7619/12188 [16:21:52<9:29:04, 7.47s/it] {'loss': 0.3061, 'grad_norm': 0.699181741198157, 'learning_rate': 3.255678611330614e-06, 'epoch': 0.63} + 63%|██████▎ | 7619/12188 [16:21:52<9:29:04, 7.47s/it] 63%|██████▎ | 7620/12188 [16:22:00<9:24:52, 7.42s/it] {'loss': 0.3031, 'grad_norm': 0.6974272675691382, 'learning_rate': 3.254433445130566e-06, 'epoch': 0.63} + 63%|██████▎ | 7620/12188 [16:22:00<9:24:52, 7.42s/it] 63%|██████▎ | 7621/12188 [16:22:08<9:38:13, 7.60s/it] {'loss': 0.3021, 'grad_norm': 0.6536429926825554, 'learning_rate': 3.2531884021996263e-06, 'epoch': 0.63} + 63%|██████▎ | 7621/12188 [16:22:08<9:38:13, 7.60s/it] 63%|██████▎ | 7622/12188 [16:22:14<9:20:18, 7.36s/it] {'loss': 0.2956, 'grad_norm': 0.6727457332862126, 'learning_rate': 3.2519434826257123e-06, 'epoch': 0.63} + 63%|██████▎ | 7622/12188 [16:22:14<9:20:18, 7.36s/it] 63%|██████▎ | 7623/12188 [16:22:22<9:19:33, 7.35s/it] {'loss': 0.3317, 'grad_norm': 0.743191989580852, 'learning_rate': 3.250698686496744e-06, 'epoch': 0.63} + 63%|██████▎ | 7623/12188 [16:22:22<9:19:33, 7.35s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f4c9b03df30> +[Try #0] Failed to fetch sample 4565549 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f4c9b03df30> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Go'"}, {'from': 'gpt', 'value': '\nclick(x=0.9675, y=0.126)\n'}]} + 63%|██████▎ | 7624/12188 [16:22:29<9:15:00, 7.30s/it] {'loss': 0.2967, 'grad_norm': 0.7527587992987506, 'learning_rate': 3.2494540139006212e-06, 'epoch': 0.63} + 63%|██████▎ | 7624/12188 [16:22:29<9:15:00, 7.30s/it] 63%|██████▎ | 7625/12188 [16:22:36<9:04:48, 7.16s/it] {'loss': 0.2954, 'grad_norm': 0.728557167924075, 'learning_rate': 3.248209464925245e-06, 'epoch': 0.63} + 63%|██████▎ | 7625/12188 [16:22:36<9:04:48, 7.16s/it] 63%|██████▎ | 7626/12188 [16:22:42<8:52:15, 7.00s/it] {'loss': 0.319, 'grad_norm': 0.6967469050914696, 'learning_rate': 3.246965039658503e-06, 'epoch': 0.63} + 63%|██████▎ | 7626/12188 [16:22:42<8:52:15, 7.00s/it] 63%|██████▎ | 7627/12188 [16:22:50<9:11:42, 7.26s/it] {'loss': 0.3555, 'grad_norm': 0.7708228526220536, 'learning_rate': 3.2457207381882714e-06, 'epoch': 0.63} + 63%|██████▎ | 7627/12188 [16:22:50<9:11:42, 7.26s/it] 63%|██████▎ | 7628/12188 [16:22:58<9:23:57, 7.42s/it] {'loss': 0.2876, 'grad_norm': 0.7294127456556637, 'learning_rate': 3.2444765606024244e-06, 'epoch': 0.63} + 63%|██████▎ | 7628/12188 [16:22:58<9:23:57, 7.42s/it] 63%|██████▎ | 7629/12188 [16:23:05<9:17:05, 7.33s/it] {'loss': 0.3045, 'grad_norm': 0.63903714863856, 'learning_rate': 3.2432325069888204e-06, 'epoch': 0.63} + 63%|██████▎ | 7629/12188 [16:23:05<9:17:05, 7.33s/it] 63%|██████▎ | 7630/12188 [16:23:14<9:49:47, 7.76s/it] {'loss': 0.3178, 'grad_norm': 0.8141833580975513, 'learning_rate': 3.2419885774353144e-06, 'epoch': 0.63} + 63%|██████▎ | 7630/12188 [16:23:14<9:49:47, 7.76s/it] 63%|██████▎ | 7631/12188 [16:23:23<10:15:17, 8.10s/it] {'loss': 0.3161, 'grad_norm': 0.6944052889103182, 'learning_rate': 3.2407447720297524e-06, 'epoch': 0.63} + 63%|██████▎ | 7631/12188 [16:23:23<10:15:17, 8.10s/it] 63%|██████▎ | 7632/12188 [16:23:30<9:42:11, 7.67s/it] {'loss': 0.307, 'grad_norm': 0.6207601443192018, 'learning_rate': 3.239501090859968e-06, 'epoch': 0.63} + 63%|██████▎ | 7632/12188 [16:23:30<9:42:11, 7.67s/it] 63%|██████▎ | 7633/12188 [16:23:37<9:34:29, 7.57s/it] {'loss': 0.2881, 'grad_norm': 0.6592349664340437, 'learning_rate': 3.238257534013788e-06, 'epoch': 0.63} + 63%|██████▎ | 7633/12188 [16:23:37<9:34:29, 7.57s/it] 63%|██████▎ | 7634/12188 [16:23:44<9:24:54, 7.44s/it] {'loss': 0.3209, 'grad_norm': 0.6456070353234807, 'learning_rate': 3.237014101579029e-06, 'epoch': 0.63} + 63%|██████▎ | 7634/12188 [16:23:44<9:24:54, 7.44s/it] 63%|██████▎ | 7635/12188 [16:23:53<9:49:39, 7.77s/it] {'loss': 0.2968, 'grad_norm': 0.6182980263764035, 'learning_rate': 3.2357707936435013e-06, 'epoch': 0.63} + 63%|██████▎ | 7635/12188 [16:23:53<9:49:39, 7.77s/it] 63%|██████▎ | 7636/12188 [16:24:00<9:49:50, 7.77s/it] {'loss': 0.322, 'grad_norm': 0.658675778388259, 'learning_rate': 3.2345276102950086e-06, 'epoch': 0.63} + 63%|██████▎ | 7636/12188 [16:24:00<9:49:50, 7.77s/it] 63%|██████▎ | 7637/12188 [16:24:07<9:31:42, 7.54s/it] {'loss': 0.2973, 'grad_norm': 0.6815429627143189, 'learning_rate': 3.2332845516213368e-06, 'epoch': 0.63} + 63%|██████▎ | 7637/12188 [16:24:07<9:31:42, 7.54s/it] 63%|██████▎ | 7638/12188 [16:24:17<10:22:39, 8.21s/it] {'loss': 0.3086, 'grad_norm': 0.7217139583548198, 'learning_rate': 3.2320416177102725e-06, 'epoch': 0.63} + 63%|██████▎ | 7638/12188 [16:24:17<10:22:39, 8.21s/it] 63%|██████▎ | 7639/12188 [16:24:25<10:06:54, 8.01s/it] {'loss': 0.263, 'grad_norm': 0.6669321005894749, 'learning_rate': 3.23079880864959e-06, 'epoch': 0.63} + 63%|██████▎ | 7639/12188 [16:24:25<10:06:54, 8.01s/it] 63%|██████▎ | 7640/12188 [16:24:31<9:37:00, 7.61s/it] {'loss': 0.3444, 'grad_norm': 0.6620074679976954, 'learning_rate': 3.229556124527051e-06, 'epoch': 0.63} + 63%|██████▎ | 7640/12188 [16:24:31<9:37:00, 7.61s/it] 63%|██████▎ | 7641/12188 [16:24:38<9:24:18, 7.45s/it] {'loss': 0.3424, 'grad_norm': 0.723476903374681, 'learning_rate': 3.2283135654304165e-06, 'epoch': 0.63} + 63%|██████▎ | 7641/12188 [16:24:38<9:24:18, 7.45s/it] 63%|██████▎ | 7642/12188 [16:24:47<9:48:11, 7.76s/it] {'loss': 0.2933, 'grad_norm': 0.8254437797499928, 'learning_rate': 3.227071131447429e-06, 'epoch': 0.63} + 63%|██████▎ | 7642/12188 [16:24:47<9:48:11, 7.76s/it] 63%|██████▎ | 7643/12188 [16:24:55<9:49:49, 7.79s/it] {'loss': 0.3034, 'grad_norm': 0.6396991208070041, 'learning_rate': 3.22582882266583e-06, 'epoch': 0.63} + 63%|██████▎ | 7643/12188 [16:24:55<9:49:49, 7.79s/it] 63%|██████▎ | 7644/12188 [16:25:03<9:54:26, 7.85s/it] {'loss': 0.3121, 'grad_norm': 0.6429618311097878, 'learning_rate': 3.2245866391733516e-06, 'epoch': 0.63} + 63%|██████▎ | 7644/12188 [16:25:03<9:54:26, 7.85s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f81086af420> +[Try #0] Failed to fetch sample 4668074 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f81086af420> +Problematic sample: {'image': '20240827_145511_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'User Promoted Notification Area'"}, {'from': 'gpt', 'value': '\nclick(x=0.8805, y=0.972)\n'}]} + 63%|██████▎ | 7645/12188 [16:25:10<9:50:00, 7.79s/it] {'loss': 0.3563, 'grad_norm': 0.6878728754207939, 'learning_rate': 3.223344581057711e-06, 'epoch': 0.63} + 63%|██████▎ | 7645/12188 [16:25:10<9:50:00, 7.79s/it] 63%|██████▎ | 7646/12188 [16:25:17<9:23:30, 7.44s/it] {'loss': 0.2872, 'grad_norm': 0.6489634218843509, 'learning_rate': 3.222102648406621e-06, 'epoch': 0.63} + 63%|██████▎ | 7646/12188 [16:25:17<9:23:30, 7.44s/it] 63%|██████▎ | 7647/12188 [16:25:24<9:13:48, 7.32s/it] {'loss': 0.3136, 'grad_norm': 0.6645297569114453, 'learning_rate': 3.2208608413077865e-06, 'epoch': 0.63} + 63%|██████▎ | 7647/12188 [16:25:24<9:13:48, 7.32s/it] 63%|██████▎ | 7648/12188 [16:25:31<9:09:46, 7.27s/it] {'loss': 0.323, 'grad_norm': 0.6827274864113, 'learning_rate': 3.2196191598488996e-06, 'epoch': 0.63} + 63%|██████▎ | 7648/12188 [16:25:31<9:09:46, 7.27s/it] 63%|██████▎ | 7649/12188 [16:25:38<9:04:58, 7.20s/it] {'loss': 0.3134, 'grad_norm': 0.6544370381231942, 'learning_rate': 3.2183776041176495e-06, 'epoch': 0.63} + 63%|██████▎ | 7649/12188 [16:25:38<9:04:58, 7.20s/it] 63%|██████▎ | 7650/12188 [16:25:45<9:03:44, 7.19s/it] {'loss': 0.3184, 'grad_norm': 0.6502903720963082, 'learning_rate': 3.2171361742017084e-06, 'epoch': 0.63} + 63%|██████▎ | 7650/12188 [16:25:45<9:03:44, 7.19s/it] 63%|██████▎ | 7651/12188 [16:25:52<8:54:13, 7.06s/it] {'loss': 0.3025, 'grad_norm': 0.6931191995602433, 'learning_rate': 3.2158948701887484e-06, 'epoch': 0.63} + 63%|██████▎ | 7651/12188 [16:25:52<8:54:13, 7.06s/it] 63%|██████▎ | 7652/12188 [16:25:59<8:42:55, 6.92s/it] {'loss': 0.3185, 'grad_norm': 0.6193038305977315, 'learning_rate': 3.2146536921664234e-06, 'epoch': 0.63} + 63%|██████▎ | 7652/12188 [16:25:59<8:42:55, 6.92s/it] 63%|██████▎ | 7653/12188 [16:26:06<8:52:41, 7.05s/it] {'loss': 0.3355, 'grad_norm': 0.738392871207813, 'learning_rate': 3.213412640222388e-06, 'epoch': 0.63} + 63%|██████▎ | 7653/12188 [16:26:06<8:52:41, 7.05s/it] 63%|██████▎ | 7654/12188 [16:26:13<8:56:06, 7.09s/it] {'loss': 0.2981, 'grad_norm': 0.6890647582385838, 'learning_rate': 3.2121717144442828e-06, 'epoch': 0.63} + 63%|██████▎ | 7654/12188 [16:26:13<8:56:06, 7.09s/it] 63%|██████▎ | 7655/12188 [16:26:21<9:15:23, 7.35s/it] {'loss': 0.3058, 'grad_norm': 0.6616803790921515, 'learning_rate': 3.2109309149197365e-06, 'epoch': 0.63} + 63%|██████▎ | 7655/12188 [16:26:21<9:15:23, 7.35s/it] 63%|██████▎ | 7656/12188 [16:26:28<9:03:27, 7.19s/it] {'loss': 0.3215, 'grad_norm': 0.6973855099040438, 'learning_rate': 3.2096902417363745e-06, 'epoch': 0.63} + 63%|██████▎ | 7656/12188 [16:26:28<9:03:27, 7.19s/it] 63%|██████▎ | 7657/12188 [16:26:37<9:34:47, 7.61s/it] {'loss': 0.3331, 'grad_norm': 0.7461142555205744, 'learning_rate': 3.2084496949818134e-06, 'epoch': 0.63} + 63%|██████▎ | 7657/12188 [16:26:37<9:34:47, 7.61s/it] 63%|██████▎ | 7658/12188 [16:26:44<9:22:27, 7.45s/it] {'loss': 0.3056, 'grad_norm': 0.710797054548336, 'learning_rate': 3.2072092747436546e-06, 'epoch': 0.63} + 63%|██████▎ | 7658/12188 [16:26:44<9:22:27, 7.45s/it] 63%|██████▎ | 7659/12188 [16:26:52<9:42:33, 7.72s/it] {'loss': 0.3216, 'grad_norm': 0.7217819045092094, 'learning_rate': 3.2059689811094984e-06, 'epoch': 0.63} + 63%|██████▎ | 7659/12188 [16:26:52<9:42:33, 7.72s/it] 63%|██████▎ | 7660/12188 [16:26:59<9:21:00, 7.43s/it] {'loss': 0.3264, 'grad_norm': 0.6268763619627743, 'learning_rate': 3.2047288141669296e-06, 'epoch': 0.63} + 63%|██████▎ | 7660/12188 [16:26:59<9:21:00, 7.43s/it] 63%|██████▎ | 7661/12188 [16:27:06<9:09:18, 7.28s/it] {'loss': 0.3371, 'grad_norm': 0.7107719857504035, 'learning_rate': 3.203488774003526e-06, 'epoch': 0.63} + 63%|██████▎ | 7661/12188 [16:27:06<9:09:18, 7.28s/it] 63%|██████▎ | 7662/12188 [16:27:14<9:39:29, 7.68s/it] {'loss': 0.2917, 'grad_norm': 0.6763816441474215, 'learning_rate': 3.2022488607068615e-06, 'epoch': 0.63} + 63%|██████▎ | 7662/12188 [16:27:14<9:39:29, 7.68s/it] 63%|██████▎ | 7663/12188 [16:27:21<9:21:49, 7.45s/it] {'loss': 0.3552, 'grad_norm': 0.7244356410442343, 'learning_rate': 3.201009074364492e-06, 'epoch': 0.63} + 63%|██████▎ | 7663/12188 [16:27:21<9:21:49, 7.45s/it] 63%|██████▎ | 7664/12188 [16:27:30<9:48:41, 7.81s/it] {'loss': 0.3357, 'grad_norm': 0.728125741078538, 'learning_rate': 3.1997694150639734e-06, 'epoch': 0.63} + 63%|██████▎ | 7664/12188 [16:27:30<9:48:41, 7.81s/it] 63%|██████▎ | 7665/12188 [16:27:37<9:20:21, 7.43s/it] {'loss': 0.2853, 'grad_norm': 0.6842723631256924, 'learning_rate': 3.1985298828928444e-06, 'epoch': 0.63} + 63%|██████▎ | 7665/12188 [16:27:37<9:20:21, 7.43s/it] 63%|██████▎ | 7666/12188 [16:27:44<9:11:56, 7.32s/it] {'loss': 0.3366, 'grad_norm': 0.7293547749459033, 'learning_rate': 3.1972904779386416e-06, 'epoch': 0.63} + 63%|██████▎ | 7666/12188 [16:27:44<9:11:56, 7.32s/it] 63%|██████▎ | 7667/12188 [16:27:50<9:01:27, 7.19s/it] {'loss': 0.2904, 'grad_norm': 0.6171349751698371, 'learning_rate': 3.19605120028889e-06, 'epoch': 0.63} + 63%|██████▎ | 7667/12188 [16:27:50<9:01:27, 7.19s/it] 63%|██████▎ | 7668/12188 [16:27:58<9:19:41, 7.43s/it] {'loss': 0.2989, 'grad_norm': 0.6399289102073317, 'learning_rate': 3.1948120500311032e-06, 'epoch': 0.63} + 63%|██████▎ | 7668/12188 [16:27:58<9:19:41, 7.43s/it] 63%|██████▎ | 7669/12188 [16:28:06<9:18:24, 7.41s/it] {'loss': 0.2985, 'grad_norm': 0.7181250489063933, 'learning_rate': 3.1935730272527897e-06, 'epoch': 0.63} + 63%|██████▎ | 7669/12188 [16:28:06<9:18:24, 7.41s/it] 63%|██████▎ | 7670/12188 [16:28:13<9:05:57, 7.25s/it] {'loss': 0.2658, 'grad_norm': 0.598959733121871, 'learning_rate': 3.192334132041446e-06, 'epoch': 0.63} + 63%|██████▎ | 7670/12188 [16:28:13<9:05:57, 7.25s/it] 63%|██████▎ | 7671/12188 [16:28:20<9:05:36, 7.25s/it] {'loss': 0.3406, 'grad_norm': 0.6150304238491153, 'learning_rate': 3.1910953644845612e-06, 'epoch': 0.63} + 63%|██████▎ | 7671/12188 [16:28:20<9:05:36, 7.25s/it] 63%|██████▎ | 7672/12188 [16:28:27<8:50:23, 7.05s/it] {'loss': 0.3034, 'grad_norm': 0.6830495151726997, 'learning_rate': 3.189856724669617e-06, 'epoch': 0.63} + 63%|██████▎ | 7672/12188 [16:28:27<8:50:23, 7.05s/it] 63%|██████▎ | 7673/12188 [16:28:33<8:45:49, 6.99s/it] {'loss': 0.3275, 'grad_norm': 0.6142015012868625, 'learning_rate': 3.1886182126840827e-06, 'epoch': 0.63} + 63%|██████▎ | 7673/12188 [16:28:33<8:45:49, 6.99s/it] 63%|██████▎ | 7674/12188 [16:28:41<8:51:57, 7.07s/it] {'loss': 0.3036, 'grad_norm': 0.7146770626128951, 'learning_rate': 3.187379828615418e-06, 'epoch': 0.63} + 63%|██████▎ | 7674/12188 [16:28:41<8:51:57, 7.07s/it] 63%|██████▎ | 7675/12188 [16:28:48<9:02:00, 7.21s/it] {'loss': 0.3227, 'grad_norm': 0.820344375086415, 'learning_rate': 3.1861415725510804e-06, 'epoch': 0.63} + 63%|██████▎ | 7675/12188 [16:28:48<9:02:00, 7.21s/it] 63%|██████▎ | 7676/12188 [16:28:56<9:14:36, 7.38s/it] {'loss': 0.3004, 'grad_norm': 0.6395680631808929, 'learning_rate': 3.1849034445785087e-06, 'epoch': 0.63} + 63%|██████▎ | 7676/12188 [16:28:56<9:14:36, 7.38s/it] 63%|██████▎ | 7677/12188 [16:29:03<9:12:44, 7.35s/it] {'loss': 0.3521, 'grad_norm': 0.6582495958386495, 'learning_rate': 3.1836654447851412e-06, 'epoch': 0.63} + 63%|██████▎ | 7677/12188 [16:29:03<9:12:44, 7.35s/it] 63%|██████▎ | 7678/12188 [16:29:11<9:13:59, 7.37s/it] {'loss': 0.322, 'grad_norm': 0.664814553175427, 'learning_rate': 3.1824275732584e-06, 'epoch': 0.63} + 63%|██████▎ | 7678/12188 [16:29:11<9:13:59, 7.37s/it] 63%|██████▎ | 7679/12188 [16:29:17<8:58:23, 7.16s/it] {'loss': 0.2988, 'grad_norm': 0.7123673891155939, 'learning_rate': 3.181189830085704e-06, 'epoch': 0.63} + 63%|██████▎ | 7679/12188 [16:29:17<8:58:23, 7.16s/it] 63%|██████▎ | 7680/12188 [16:29:24<8:57:35, 7.16s/it] {'loss': 0.3034, 'grad_norm': 0.6552665439888942, 'learning_rate': 3.1799522153544604e-06, 'epoch': 0.63} + 63%|██████▎ | 7680/12188 [16:29:24<8:57:35, 7.16s/it] 63%|██████▎ | 7681/12188 [16:29:32<8:55:42, 7.13s/it] {'loss': 0.3425, 'grad_norm': 0.7884622893745143, 'learning_rate': 3.1787147291520675e-06, 'epoch': 0.63} + 63%|██████▎ | 7681/12188 [16:29:32<8:55:42, 7.13s/it] 63%|██████▎ | 7682/12188 [16:29:38<8:44:48, 6.99s/it] {'loss': 0.3202, 'grad_norm': 0.6886132983028815, 'learning_rate': 3.1774773715659152e-06, 'epoch': 0.63} + 63%|██████▎ | 7682/12188 [16:29:38<8:44:48, 6.99s/it] 63%|██████▎ | 7683/12188 [16:29:45<8:48:16, 7.04s/it] {'loss': 0.3597, 'grad_norm': 0.8753437990173845, 'learning_rate': 3.1762401426833806e-06, 'epoch': 0.63} + 63%|██████▎ | 7683/12188 [16:29:45<8:48:16, 7.04s/it] 63%|██████▎ | 7684/12188 [16:29:53<9:04:35, 7.25s/it] {'loss': 0.3036, 'grad_norm': 0.7235016535126797, 'learning_rate': 3.175003042591837e-06, 'epoch': 0.63} + 63%|██████▎ | 7684/12188 [16:29:53<9:04:35, 7.25s/it] 63%|██████▎ | 7685/12188 [16:30:01<9:07:52, 7.30s/it] {'loss': 0.3485, 'grad_norm': 0.6932519814010062, 'learning_rate': 3.173766071378649e-06, 'epoch': 0.63} + 63%|██████▎ | 7685/12188 [16:30:01<9:07:52, 7.30s/it] 63%|██████▎ | 7686/12188 [16:30:08<9:17:53, 7.44s/it] {'loss': 0.3177, 'grad_norm': 0.6230443462360661, 'learning_rate': 3.1725292291311645e-06, 'epoch': 0.63} + 63%|██████▎ | 7686/12188 [16:30:08<9:17:53, 7.44s/it] 63%|██████▎ | 7687/12188 [16:30:15<8:59:29, 7.19s/it] {'loss': 0.3016, 'grad_norm': 0.6553346560339011, 'learning_rate': 3.1712925159367313e-06, 'epoch': 0.63} + 63%|██████▎ | 7687/12188 [16:30:15<8:59:29, 7.19s/it] 63%|██████▎ | 7688/12188 [16:30:22<9:01:21, 7.22s/it] {'loss': 0.3042, 'grad_norm': 0.7329492457946207, 'learning_rate': 3.1700559318826823e-06, 'epoch': 0.63} + 63%|██████▎ | 7688/12188 [16:30:22<9:01:21, 7.22s/it] 63%|██████▎ | 7689/12188 [16:30:29<8:47:35, 7.04s/it] {'loss': 0.3289, 'grad_norm': 0.770709513148705, 'learning_rate': 3.1688194770563424e-06, 'epoch': 0.63} + 63%|██████▎ | 7689/12188 [16:30:29<8:47:35, 7.04s/it] 63%|██████▎ | 7690/12188 [16:30:37<9:16:09, 7.42s/it] {'loss': 0.3103, 'grad_norm': 0.647648193195911, 'learning_rate': 3.1675831515450305e-06, 'epoch': 0.63} + 63%|██████▎ | 7690/12188 [16:30:37<9:16:09, 7.42s/it] 63%|██████▎ | 7691/12188 [16:30:45<9:20:06, 7.47s/it] {'loss': 0.3458, 'grad_norm': 0.6848508568075091, 'learning_rate': 3.166346955436051e-06, 'epoch': 0.63} + 63%|██████▎ | 7691/12188 [16:30:45<9:20:06, 7.47s/it] 63%|██████▎ | 7692/12188 [16:30:53<9:34:48, 7.67s/it] {'loss': 0.2987, 'grad_norm': 0.659630068089105, 'learning_rate': 3.1651108888167033e-06, 'epoch': 0.63} + 63%|██████▎ | 7692/12188 [16:30:53<9:34:48, 7.67s/it] 63%|██████▎ | 7693/12188 [16:31:00<9:22:47, 7.51s/it] {'loss': 0.3089, 'grad_norm': 0.6067012017581452, 'learning_rate': 3.1638749517742785e-06, 'epoch': 0.63} + 63%|██████▎ | 7693/12188 [16:31:00<9:22:47, 7.51s/it] 63%|██████▎ | 7694/12188 [16:31:08<9:31:15, 7.63s/it] {'loss': 0.2849, 'grad_norm': 0.6415237323407528, 'learning_rate': 3.1626391443960537e-06, 'epoch': 0.63} + 63%|██████▎ | 7694/12188 [16:31:08<9:31:15, 7.63s/it] 63%|██████▎ | 7695/12188 [16:31:14<9:07:49, 7.32s/it] {'loss': 0.2893, 'grad_norm': 0.6965538761496733, 'learning_rate': 3.1614034667693016e-06, 'epoch': 0.63} + 63%|██████▎ | 7695/12188 [16:31:14<9:07:49, 7.32s/it] 63%|██████▎ | 7696/12188 [16:31:21<8:58:00, 7.19s/it] {'loss': 0.3199, 'grad_norm': 0.7015444175306825, 'learning_rate': 3.1601679189812806e-06, 'epoch': 0.63} + 63%|██████▎ | 7696/12188 [16:31:21<8:58:00, 7.19s/it] 63%|██████▎ | 7697/12188 [16:31:31<9:48:44, 7.87s/it] {'loss': 0.298, 'grad_norm': 0.5915769295369177, 'learning_rate': 3.1589325011192457e-06, 'epoch': 0.63} + 63%|██████▎ | 7697/12188 [16:31:31<9:48:44, 7.87s/it] 63%|██████▎ | 7698/12188 [16:31:38<9:32:56, 7.66s/it] {'loss': 0.3881, 'grad_norm': 0.7857930812724384, 'learning_rate': 3.1576972132704413e-06, 'epoch': 0.63} + 63%|██████▎ | 7698/12188 [16:31:38<9:32:56, 7.66s/it] 63%|██████▎ | 7699/12188 [16:31:45<9:24:21, 7.54s/it] {'loss': 0.3189, 'grad_norm': 0.7164104071056457, 'learning_rate': 3.1564620555220983e-06, 'epoch': 0.63} + 63%|██████▎ | 7699/12188 [16:31:45<9:24:21, 7.54s/it] 63%|██████▎ | 7700/12188 [16:31:52<9:07:04, 7.31s/it] {'loss': 0.3185, 'grad_norm': 0.730883707832101, 'learning_rate': 3.155227027961444e-06, 'epoch': 0.63} + 63%|██████▎ | 7700/12188 [16:31:52<9:07:04, 7.31s/it] 63%|██████▎ | 7701/12188 [16:31:59<9:07:44, 7.32s/it] {'loss': 0.3004, 'grad_norm': 0.6656107309884524, 'learning_rate': 3.153992130675693e-06, 'epoch': 0.63} + 63%|██████▎ | 7701/12188 [16:31:59<9:07:44, 7.32s/it] 63%|██████▎ | 7702/12188 [16:32:06<8:52:03, 7.12s/it] {'loss': 0.321, 'grad_norm': 0.704859230151463, 'learning_rate': 3.15275736375205e-06, 'epoch': 0.63} + 63%|██████▎ | 7702/12188 [16:32:06<8:52:03, 7.12s/it] 63%|██████▎ | 7703/12188 [16:32:13<8:54:50, 7.16s/it] {'loss': 0.3049, 'grad_norm': 0.6766159215327779, 'learning_rate': 3.1515227272777177e-06, 'epoch': 0.63} + 63%|██████▎ | 7703/12188 [16:32:13<8:54:50, 7.16s/it] 63%|██████▎ | 7704/12188 [16:32:21<9:01:41, 7.25s/it] {'loss': 0.2859, 'grad_norm': 0.6955054441538157, 'learning_rate': 3.1502882213398776e-06, 'epoch': 0.63} + 63%|██████▎ | 7704/12188 [16:32:21<9:01:41, 7.25s/it] 63%|██████▎ | 7705/12188 [16:32:28<8:52:59, 7.13s/it] {'loss': 0.3289, 'grad_norm': 0.6607658627429287, 'learning_rate': 3.1490538460257135e-06, 'epoch': 0.63} + 63%|██████▎ | 7705/12188 [16:32:28<8:52:59, 7.13s/it] 63%|██████▎ | 7706/12188 [16:32:35<8:56:55, 7.19s/it] {'loss': 0.2979, 'grad_norm': 0.7047971494036114, 'learning_rate': 3.1478196014223916e-06, 'epoch': 0.63} + 63%|██████▎ | 7706/12188 [16:32:35<8:56:55, 7.19s/it] 63%|██████▎ | 7707/12188 [16:32:42<8:49:44, 7.09s/it] {'loss': 0.3168, 'grad_norm': 0.7931731283742318, 'learning_rate': 3.146585487617074e-06, 'epoch': 0.63} + 63%|██████▎ | 7707/12188 [16:32:42<8:49:44, 7.09s/it] 63%|██████▎ | 7708/12188 [16:32:49<8:52:39, 7.13s/it] {'loss': 0.327, 'grad_norm': 0.6838545553218616, 'learning_rate': 3.145351504696912e-06, 'epoch': 0.63} + 63%|██████▎ | 7708/12188 [16:32:49<8:52:39, 7.13s/it] 63%|██████▎ | 7709/12188 [16:32:56<8:53:51, 7.15s/it] {'loss': 0.305, 'grad_norm': 0.6779178034041179, 'learning_rate': 3.144117652749047e-06, 'epoch': 0.63} + 63%|██████▎ | 7709/12188 [16:32:56<8:53:51, 7.15s/it] 63%|██████▎ | 7710/12188 [16:33:04<9:07:25, 7.33s/it] {'loss': 0.3143, 'grad_norm': 0.6598304439869621, 'learning_rate': 3.14288393186061e-06, 'epoch': 0.63} + 63%|██████▎ | 7710/12188 [16:33:04<9:07:25, 7.33s/it] 63%|██████▎ | 7711/12188 [16:33:11<9:00:54, 7.25s/it] {'loss': 0.2768, 'grad_norm': 0.7357075906900127, 'learning_rate': 3.141650342118728e-06, 'epoch': 0.63} + 63%|█��████▎ | 7711/12188 [16:33:11<9:00:54, 7.25s/it] 63%|██████▎ | 7712/12188 [16:33:18<8:58:39, 7.22s/it] {'loss': 0.2891, 'grad_norm': 0.6646151843771846, 'learning_rate': 3.140416883610512e-06, 'epoch': 0.63} + 63%|██████▎ | 7712/12188 [16:33:18<8:58:39, 7.22s/it] 63%|██████▎ | 7713/12188 [16:33:25<8:49:05, 7.09s/it] {'loss': 0.2797, 'grad_norm': 0.6805452976530105, 'learning_rate': 3.13918355642307e-06, 'epoch': 0.63} + 63%|██████▎ | 7713/12188 [16:33:25<8:49:05, 7.09s/it] 63%|██████▎ | 7714/12188 [16:33:32<8:52:04, 7.14s/it] {'loss': 0.3643, 'grad_norm': 0.6857112013066167, 'learning_rate': 3.1379503606434937e-06, 'epoch': 0.63} + 63%|██████▎ | 7714/12188 [16:33:32<8:52:04, 7.14s/it] 63%|██████▎ | 7715/12188 [16:33:39<8:44:38, 7.04s/it] {'loss': 0.3197, 'grad_norm': 0.738476705711589, 'learning_rate': 3.1367172963588714e-06, 'epoch': 0.63} + 63%|██████▎ | 7715/12188 [16:33:39<8:44:38, 7.04s/it] 63%|██████▎ | 7716/12188 [16:33:47<8:57:48, 7.22s/it] {'loss': 0.3529, 'grad_norm': 0.7177204449897122, 'learning_rate': 3.1354843636562816e-06, 'epoch': 0.63} + 63%|██████▎ | 7716/12188 [16:33:47<8:57:48, 7.22s/it] 63%|██████▎ | 7717/12188 [16:33:54<9:12:50, 7.42s/it] {'loss': 0.28, 'grad_norm': 0.619961436495956, 'learning_rate': 3.1342515626227886e-06, 'epoch': 0.63} + 63%|██████▎ | 7717/12188 [16:33:55<9:12:50, 7.42s/it] 63%|██████▎ | 7718/12188 [16:34:04<9:58:50, 8.04s/it] {'loss': 0.3123, 'grad_norm': 0.6652415108340368, 'learning_rate': 3.133018893345455e-06, 'epoch': 0.63} + 63%|██████▎ | 7718/12188 [16:34:04<9:58:50, 8.04s/it] 63%|██████▎ | 7719/12188 [16:34:11<9:31:14, 7.67s/it] {'loss': 0.3269, 'grad_norm': 0.6395013176946704, 'learning_rate': 3.131786355911325e-06, 'epoch': 0.63} + 63%|██████▎ | 7719/12188 [16:34:11<9:31:14, 7.67s/it] 63%|██████▎ | 7720/12188 [16:34:19<9:39:52, 7.79s/it] {'loss': 0.3483, 'grad_norm': 0.7052746151754038, 'learning_rate': 3.1305539504074412e-06, 'epoch': 0.63} + 63%|██████▎ | 7720/12188 [16:34:19<9:39:52, 7.79s/it] 63%|██████▎ | 7721/12188 [16:34:26<9:30:15, 7.66s/it] {'loss': 0.3009, 'grad_norm': 0.6820688966662933, 'learning_rate': 3.1293216769208355e-06, 'epoch': 0.63} + 63%|██████▎ | 7721/12188 [16:34:26<9:30:15, 7.66s/it] 63%|██████▎ | 7722/12188 [16:34:33<9:15:55, 7.47s/it] {'loss': 0.2909, 'grad_norm': 0.6788033104167813, 'learning_rate': 3.128089535538527e-06, 'epoch': 0.63} + 63%|██████▎ | 7722/12188 [16:34:33<9:15:55, 7.47s/it] 63%|██████▎ | 7723/12188 [16:34:40<8:58:02, 7.23s/it] {'loss': 0.3278, 'grad_norm': 0.7172173955801459, 'learning_rate': 3.126857526347529e-06, 'epoch': 0.63} + 63%|██████▎ | 7723/12188 [16:34:40<8:58:02, 7.23s/it] 63%|██████▎ | 7724/12188 [16:34:47<8:58:19, 7.24s/it] {'loss': 0.3086, 'grad_norm': 0.6525387882591213, 'learning_rate': 3.1256256494348413e-06, 'epoch': 0.63} + 63%|██████▎ | 7724/12188 [16:34:47<8:58:19, 7.24s/it] 63%|██████▎ | 7725/12188 [16:34:54<8:56:40, 7.22s/it] {'loss': 0.3071, 'grad_norm': 0.6472449377875968, 'learning_rate': 3.1243939048874585e-06, 'epoch': 0.63} + 63%|██████▎ | 7725/12188 [16:34:54<8:56:40, 7.22s/it] 63%|██████▎ | 7726/12188 [16:35:02<9:04:59, 7.33s/it] {'loss': 0.3211, 'grad_norm': 0.7278765571012226, 'learning_rate': 3.1231622927923667e-06, 'epoch': 0.63} + 63%|██████▎ | 7726/12188 [16:35:02<9:04:59, 7.33s/it] 63%|██████▎ | 7727/12188 [16:35:09<8:54:17, 7.19s/it] {'loss': 0.3654, 'grad_norm': 0.6914740744667582, 'learning_rate': 3.1219308132365365e-06, 'epoch': 0.63} + 63%|██████▎ | 7727/12188 [16:35:09<8:54:17, 7.19s/it] 63%|██████▎ | 7728/12188 [16:35:16<8:54:29, 7.19s/it] {'loss': 0.3042, 'grad_norm': 0.6941591636512837, 'learning_rate': 3.1206994663069364e-06, 'epoch': 0.63} + 63%|██████▎ | 7728/12188 [16:35:16<8:54:29, 7.19s/it] 63%|██████▎ | 7729/12188 [16:35:25<9:32:05, 7.70s/it] {'loss': 0.3257, 'grad_norm': 0.7532209788762007, 'learning_rate': 3.1194682520905207e-06, 'epoch': 0.63} + 63%|██████▎ | 7729/12188 [16:35:25<9:32:05, 7.70s/it] 63%|██████▎ | 7730/12188 [16:35:32<9:15:18, 7.47s/it] {'loss': 0.3422, 'grad_norm': 0.6448576594540297, 'learning_rate': 3.1182371706742344e-06, 'epoch': 0.63} + 63%|██████▎ | 7730/12188 [16:35:32<9:15:18, 7.47s/it] 63%|██████▎ | 7731/12188 [16:35:39<9:09:35, 7.40s/it] {'loss': 0.332, 'grad_norm': 0.6535292216186267, 'learning_rate': 3.1170062221450183e-06, 'epoch': 0.63} + 63%|██████▎ | 7731/12188 [16:35:39<9:09:35, 7.40s/it] 63%|██████▎ | 7732/12188 [16:35:46<9:05:06, 7.34s/it] {'loss': 0.2966, 'grad_norm': 0.686492618120573, 'learning_rate': 3.1157754065897944e-06, 'epoch': 0.63} + 63%|██████▎ | 7732/12188 [16:35:46<9:05:06, 7.34s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 899, in process_image_unified + visual_processed = processor.preprocess(image, return_tensors="pt") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 432, in preprocess + patches, image_grid_thw = self._preprocess( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 252, in _preprocess + resized_height, resized_width = smart_resize( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 69, in smart_resize + raise ValueError(f"height:{height} and width:{width} must be larger than factor:{factor}") +ValueError: height:21 and width:135 must be larger than factor:28 +[Try #0] Failed to fetch sample 2128867 in VC:s3://gui-agent/jedi/images/figma400k/figma400k_extracted/. Exception: height:21 and width:135 must be larger than factor:28 +Problematic sample: {'image': 'b740dccee641dd995e5ce727ca3882efdf31feffa6d5688fe120c85e9c186e93.png', 'conversations': [{'from': 'human', 'value': "\nLet me describe the visual characteristics of this Text label:\nThe element is a text label with the words 'Your Library' in white, set against a black background. The font is sans-serif, providing a clean and modern look. It is part of a vertical navigation menu on the left side of the interface, which features other similar text labels and icons.\n\nUsage and purpose of this Text label:\nThe primary function of this element is to navigate the user to their personal library within the application. When clicked, it likely displays the user's saved music, playlists, and other personalized content."}, {'from': 'gpt', 'value': '[[0, 0, 1000, 1000]]', 'recipient': 'all', 'end_turn': True}]} + 63%|██████▎ | 7733/12188 [16:35:53<8:53:32, 7.19s/it] {'loss': 0.2853, 'grad_norm': 0.7569979609088898, 'learning_rate': 3.1145447240954848e-06, 'epoch': 0.63} + 63%|██████▎ | 7733/12188 [16:35:53<8:53:32, 7.19s/it] 63%|██████▎ | 7734/12188 [16:36:00<8:51:37, 7.16s/it] {'loss': 0.2925, 'grad_norm': 0.6035540458827029, 'learning_rate': 3.1133141747489993e-06, 'epoch': 0.63} + 63%|██████▎ | 7734/12188 [16:36:00<8:51:37, 7.16s/it] 63%|██████▎ | 7735/12188 [16:36:07<8:49:12, 7.13s/it] {'loss': 0.3177, 'grad_norm': 0.7413393955543822, 'learning_rate': 3.1120837586372343e-06, 'epoch': 0.63} + 63%|██████▎ | 7735/12188 [16:36:07<8:49:12, 7.13s/it] 63%|██████▎ | 7736/12188 [16:36:15<8:56:32, 7.23s/it] {'loss': 0.3307, 'grad_norm': 0.6515873071705026, 'learning_rate': 3.1108534758470817e-06, 'epoch': 0.63} + 63%|██████▎ | 7736/12188 [16:36:15<8:56:32, 7.23s/it] 63%|██████▎ | 7737/12188 [16:36:22<8:49:46, 7.14s/it] {'loss': 0.3069, 'grad_norm': 0.629822621986909, 'learning_rate': 3.109623326465421e-06, 'epoch': 0.63} + 63%|██████▎ | 7737/12188 [16:36:22<8:49:46, 7.14s/it] 63%|██████▎ | 7738/12188 [16:36:28<8:36:34, 6.96s/it] {'loss': 0.3209, 'grad_norm': 0.7625268411358558, 'learning_rate': 3.108393310579123e-06, 'epoch': 0.63} + 63%|██████▎ | 7738/12188 [16:36:28<8:36:34, 6.96s/it] 63%|██████▎ | 7739/12188 [16:36:35<8:39:35, 7.01s/it] {'loss': 0.3164, 'grad_norm': 0.6747100024333176, 'learning_rate': 3.1071634282750523e-06, 'epoch': 0.63} + 63%|██████▎ | 7739/12188 [16:36:35<8:39:35, 7.01s/it] 64%|██████▎ | 7740/12188 [16:36:42<8:39:56, 7.01s/it] {'loss': 0.2987, 'grad_norm': 0.8496436443397873, 'learning_rate': 3.1059336796400576e-06, 'epoch': 0.64} + 64%|██████▎ | 7740/12188 [16:36:42<8:39:56, 7.01s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 64%|██████▎ | 7741/12188 [16:36:49<8:27:18, 6.84s/it] {'loss': 0.6918, 'grad_norm': 0.5937131282410163, 'learning_rate': 3.104704064760985e-06, 'epoch': 0.64} + 64%|██████▎ | 7741/12188 [16:36:49<8:27:18, 6.84s/it] 64%|██████▎ | 7742/12188 [16:36:56<8:34:16, 6.94s/it] {'loss': 0.3256, 'grad_norm': 0.8217064341260074, 'learning_rate': 3.1034745837246636e-06, 'epoch': 0.64} + 64%|██████▎ | 7742/12188 [16:36:56<8:34:16, 6.94s/it] 64%|██████▎ | 7743/12188 [16:37:04<8:53:58, 7.21s/it] {'loss': 0.2879, 'grad_norm': 0.6213375639938397, 'learning_rate': 3.1022452366179223e-06, 'epoch': 0.64} + 64%|██████▎ | 7743/12188 [16:37:04<8:53:58, 7.21s/it] 64%|██████▎ | 7744/12188 [16:37:11<8:50:32, 7.16s/it] {'loss': 0.2929, 'grad_norm': 0.7154280654282967, 'learning_rate': 3.1010160235275743e-06, 'epoch': 0.64} + 64%|██████▎ | 7744/12188 [16:37:11<8:50:32, 7.16s/it] 64%|██████▎ | 7745/12188 [16:37:18<8:43:44, 7.07s/it] {'loss': 0.3335, 'grad_norm': 0.66271222534041, 'learning_rate': 3.0997869445404214e-06, 'epoch': 0.64} + 64%|██████▎ | 7745/12188 [16:37:18<8:43:44, 7.07s/it] 64%|██████▎ | 7746/12188 [16:37:25<8:53:42, 7.21s/it] {'loss': 0.3431, 'grad_norm': 0.7180636539183726, 'learning_rate': 3.0985579997432613e-06, 'epoch': 0.64} + 64%|██████▎ | 7746/12188 [16:37:25<8:53:42, 7.21s/it] 64%|██████▎ | 7747/12188 [16:37:33<9:13:26, 7.48s/it] {'loss': 0.3505, 'grad_norm': 0.7687384237103397, 'learning_rate': 3.097329189222883e-06, 'epoch': 0.64} + 64%|██████▎ | 7747/12188 [16:37:33<9:13:26, 7.48s/it] 64%|██████▎ | 7748/12188 [16:37:41<9:15:32, 7.51s/it] {'loss': 0.3323, 'grad_norm': 0.6185108877599653, 'learning_rate': 3.0961005130660584e-06, 'epoch': 0.64} + 64%|██████▎ | 7748/12188 [16:37:41<9:15:32, 7.51s/it] 64%|██████▎ | 7749/12188 [16:37:50<9:56:59, 8.07s/it] {'loss': 0.2964, 'grad_norm': 0.6843305741287432, 'learning_rate': 3.094871971359558e-06, 'epoch': 0.64} + 64%|██████▎ | 7749/12188 [16:37:50<9:56:59, 8.07s/it] 64%|██████▎ | 7750/12188 [16:37:58<9:51:18, 7.99s/it] {'loss': 0.2877, 'grad_norm': 0.6878783804828956, 'learning_rate': 3.093643564190138e-06, 'epoch': 0.64} + 64%|██████▎ | 7750/12188 [16:37:58<9:51:18, 7.99s/it] 64%|██████▎ | 7751/12188 [16:38:05<9:26:00, 7.65s/it] {'loss': 0.3461, 'grad_norm': 0.6648808518767417, 'learning_rate': 3.092415291644545e-06, 'epoch': 0.64} + 64%|██████▎ | 7751/12188 [16:38:05<9:26:00, 7.65s/it] 64%|██████▎ | 7752/12188 [16:38:13<9:31:16, 7.73s/it] {'loss': 0.3038, 'grad_norm': 0.676705409848407, 'learning_rate': 3.0911871538095217e-06, 'epoch': 0.64} + 64%|██████▎ | 7752/12188 [16:38:13<9:31:16, 7.73s/it] 64%|██████▎ | 7753/12188 [16:38:20<9:29:01, 7.70s/it] {'loss': 0.3079, 'grad_norm': 0.8349058550922067, 'learning_rate': 3.089959150771793e-06, 'epoch': 0.64} + 64%|██████▎ | 7753/12188 [16:38:20<9:29:01, 7.70s/it] 64%|██████▎ | 7754/12188 [16:38:28<9:16:45, 7.53s/it] {'loss': 0.315, 'grad_norm': 0.6958876339775097, 'learning_rate': 3.088731282618083e-06, 'epoch': 0.64} + 64%|██████▎ | 7754/12188 [16:38:28<9:16:45, 7.53s/it] 64%|██████▎ | 7755/12188 [16:38:34<9:01:57, 7.34s/it] {'loss': 0.3498, 'grad_norm': 0.6853061478553536, 'learning_rate': 3.087503549435096e-06, 'epoch': 0.64} + 64%|██████▎ | 7755/12188 [16:38:34<9:01:57, 7.34s/it] 64%|██████▎ | 7756/12188 [16:38:42<8:54:47, 7.24s/it] {'loss': 0.3164, 'grad_norm': 0.7244817580127989, 'learning_rate': 3.0862759513095364e-06, 'epoch': 0.64} + 64%|██████▎ | 7756/12188 [16:38:42<8:54:47, 7.24s/it] 64%|██████▎ | 7757/12188 [16:38:49<9:01:33, 7.33s/it] {'loss': 0.2983, 'grad_norm': 1.24767176170229, 'learning_rate': 3.0850484883280963e-06, 'epoch': 0.64} + 64%|██████▎ | 7757/12188 [16:38:49<9:01:33, 7.33s/it] 64%|██████▎ | 7758/12188 [16:38:57<9:11:47, 7.47s/it] {'loss': 0.309, 'grad_norm': 0.7757573417954177, 'learning_rate': 3.083821160577453e-06, 'epoch': 0.64} + 64%|██████▎ | 7758/12188 [16:38:57<9:11:47, 7.47s/it] 64%|██████▎ | 7759/12188 [16:39:04<9:07:26, 7.42s/it] {'loss': 0.3169, 'grad_norm': 0.6753151552296048, 'learning_rate': 3.082593968144283e-06, 'epoch': 0.64} + 64%|██████▎ | 7759/12188 [16:39:04<9:07:26, 7.42s/it] 64%|██████▎ | 7760/12188 [16:39:11<8:51:17, 7.20s/it] {'loss': 0.3258, 'grad_norm': 0.6986615028157744, 'learning_rate': 3.0813669111152445e-06, 'epoch': 0.64} + 64%|██████▎ | 7760/12188 [16:39:11<8:51:17, 7.20s/it] 64%|██████▎ | 7761/12188 [16:39:18<8:41:29, 7.07s/it] {'loss': 0.3198, 'grad_norm': 0.7586305099647502, 'learning_rate': 3.0801399895769917e-06, 'epoch': 0.64} + 64%|██████▎ | 7761/12188 [16:39:18<8:41:29, 7.07s/it] 64%|██████▎ | 7762/12188 [16:39:25<8:45:30, 7.12s/it] {'loss': 0.3182, 'grad_norm': 0.6816405953987477, 'learning_rate': 3.0789132036161707e-06, 'epoch': 0.64} + 64%|██████▎ | 7762/12188 [16:39:25<8:45:30, 7.12s/it] 64%|██████▎ | 7763/12188 [16:39:33<9:16:26, 7.54s/it] {'loss': 0.3193, 'grad_norm': 0.7083136586293868, 'learning_rate': 3.0776865533194123e-06, 'epoch': 0.64} + 64%|██████▎ | 7763/12188 [16:39:33<9:16:26, 7.54s/it] 64%|██████▎ | 7764/12188 [16:39:41<9:08:40, 7.44s/it] {'loss': 0.3188, 'grad_norm': 0.7942095722553699, 'learning_rate': 3.076460038773339e-06, 'epoch': 0.64} + 64%|██████▎ | 7764/12188 [16:39:41<9:08:40, 7.44s/it] 64%|██████▎ | 7765/12188 [16:39:47<8:49:28, 7.18s/it] {'loss': 0.3151, 'grad_norm': 0.6728994197255231, 'learning_rate': 3.0752336600645705e-06, 'epoch': 0.64} + 64%|██████▎ | 7765/12188 [16:39:47<8:49:28, 7.18s/it] 64%|██████▎ | 7766/12188 [16:39:55<8:58:00, 7.30s/it] {'loss': 0.3092, 'grad_norm': 0.697624331311182, 'learning_rate': 3.0740074172797058e-06, 'epoch': 0.64} + 64%|██████▎ | 7766/12188 [16:39:55<8:58:00, 7.30s/it] 64%|██████▎ | 7767/12188 [16:40:01<8:44:41, 7.12s/it] {'loss': 0.282, 'grad_norm': 0.6604789170946354, 'learning_rate': 3.072781310505345e-06, 'epoch': 0.64} + 64%|██████▎ | 7767/12188 [16:40:01<8:44:41, 7.12s/it] 64%|██████▎ | 7768/12188 [16:40:08<8:34:05, 6.98s/it] {'loss': 0.2862, 'grad_norm': 0.6655589846856483, 'learning_rate': 3.0715553398280694e-06, 'epoch': 0.64} + 64%|██████▎ | 7768/12188 [16:40:08<8:34:05, 6.98s/it] 64%|██████▎ | 7769/12188 [16:40:16<8:51:33, 7.22s/it] {'loss': 0.2953, 'grad_norm': 0.6688355026495787, 'learning_rate': 3.0703295053344597e-06, 'epoch': 0.64} + 64%|██████▎ | 7769/12188 [16:40:16<8:51:33, 7.22s/it] 64%|██████▍ | 7770/12188 [16:40:24<9:01:07, 7.35s/it] {'loss': 0.3093, 'grad_norm': 0.7118784222864066, 'learning_rate': 3.069103807111079e-06, 'epoch': 0.64} + 64%|██████▍ | 7770/12188 [16:40:24<9:01:07, 7.35s/it] 64%|██████▍ | 7771/12188 [16:40:30<8:47:16, 7.16s/it] {'loss': 0.3562, 'grad_norm': 0.6926295731579223, 'learning_rate': 3.0678782452444855e-06, 'epoch': 0.64} + 64%|██████▍ | 7771/12188 [16:40:30<8:47:16, 7.16s/it] 64%|██████▍ | 7772/12188 [16:40:37<8:45:01, 7.13s/it] {'loss': 0.3312, 'grad_norm': 0.6750179132320691, 'learning_rate': 3.066652819821227e-06, 'epoch': 0.64} + 64%|██████▍ | 7772/12188 [16:40:37<8:45:01, 7.13s/it] 64%|██████▍ | 7773/12188 [16:40:44<8:33:05, 6.97s/it] {'loss': 0.2796, 'grad_norm': 0.691101915798543, 'learning_rate': 3.0654275309278382e-06, 'epoch': 0.64} + 64%|██████▍ | 7773/12188 [16:40:44<8:33:05, 6.97s/it] 64%|██████▍ | 7774/12188 [16:40:52<8:47:18, 7.17s/it] {'loss': 0.3119, 'grad_norm': 0.7363899930994864, 'learning_rate': 3.064202378650849e-06, 'epoch': 0.64} + 64%|██████▍ | 7774/12188 [16:40:52<8:47:18, 7.17s/it] 64%|██████▍ | 7775/12188 [16:40:58<8:35:27, 7.01s/it] {'loss': 0.3189, 'grad_norm': 0.6756953799120246, 'learning_rate': 3.06297736307678e-06, 'epoch': 0.64} + 64%|██████▍ | 7775/12188 [16:40:58<8:35:27, 7.01s/it] 64%|██████▍ | 7776/12188 [16:41:05<8:27:56, 6.91s/it] {'loss': 0.296, 'grad_norm': 0.67328994396926, 'learning_rate': 3.0617524842921354e-06, 'epoch': 0.64} + 64%|██████▍ | 7776/12188 [16:41:05<8:27:56, 6.91s/it] 64%|██████▍ | 7777/12188 [16:41:12<8:35:29, 7.01s/it] {'loss': 0.2807, 'grad_norm': 0.6513208351645545, 'learning_rate': 3.060527742383419e-06, 'epoch': 0.64} + 64%|██████▍ | 7777/12188 [16:41:12<8:35:29, 7.01s/it] 64%|██████▍ | 7778/12188 [16:41:20<8:50:39, 7.22s/it] {'loss': 0.3171, 'grad_norm': 0.6876881168599975, 'learning_rate': 3.059303137437115e-06, 'epoch': 0.64} + 64%|██████▍ | 7778/12188 [16:41:20<8:50:39, 7.22s/it] 64%|██████▍ | 7779/12188 [16:41:27<8:43:03, 7.12s/it] {'loss': 0.308, 'grad_norm': 0.699789160308309, 'learning_rate': 3.0580786695397057e-06, 'epoch': 0.64} + 64%|██████▍ | 7779/12188 [16:41:27<8:43:03, 7.12s/it] 64%|██████▍ | 7780/12188 [16:41:35<9:02:25, 7.38s/it] {'loss': 0.3489, 'grad_norm': 0.7496674615646777, 'learning_rate': 3.0568543387776623e-06, 'epoch': 0.64} + 64%|██████▍ | 7780/12188 [16:41:35<9:02:25, 7.38s/it] 64%|██████▍ | 7781/12188 [16:41:41<8:48:09, 7.19s/it] {'loss': 0.2831, 'grad_norm': 0.7099805082503451, 'learning_rate': 3.0556301452374415e-06, 'epoch': 0.64} + 64%|██████▍ | 7781/12188 [16:41:41<8:48:09, 7.19s/it] 64%|██████▍ | 7782/12188 [16:41:48<8:35:46, 7.02s/it] {'loss': 0.3291, 'grad_norm': 0.7109047582319586, 'learning_rate': 3.054406089005496e-06, 'epoch': 0.64} + 64%|██████▍ | 7782/12188 [16:41:48<8:35:46, 7.02s/it] 64%|██████▍ | 7783/12188 [16:41:55<8:35:47, 7.03s/it] {'loss': 0.3232, 'grad_norm': 0.6554576447115273, 'learning_rate': 3.0531821701682696e-06, 'epoch': 0.64} + 64%|██████▍ | 7783/12188 [16:41:55<8:35:47, 7.03s/it] 64%|██████▍ | 7784/12188 [16:42:02<8:30:17, 6.95s/it] {'loss': 0.3047, 'grad_norm': 0.6664733612039719, 'learning_rate': 3.0519583888121885e-06, 'epoch': 0.64} + 64%|██████▍ | 7784/12188 [16:42:02<8:30:17, 6.95s/it] 64%|██████▍ | 7785/12188 [16:42:09<8:27:04, 6.91s/it] {'loss': 0.3297, 'grad_norm': 0.6347806729101962, 'learning_rate': 3.0507347450236775e-06, 'epoch': 0.64} + 64%|██████▍ | 7785/12188 [16:42:09<8:27:04, 6.91s/it] 64%|██████▍ | 7786/12188 [16:42:16<8:31:57, 6.98s/it] {'loss': 0.3029, 'grad_norm': 0.6950434945027444, 'learning_rate': 3.0495112388891446e-06, 'epoch': 0.64} + 64%|██████▍ | 7786/12188 [16:42:16<8:31:57, 6.98s/it] 64%|██████▍ | 7787/12188 [16:42:22<8:24:48, 6.88s/it] {'loss': 0.262, 'grad_norm': 0.5846720657151786, 'learning_rate': 3.0482878704949946e-06, 'epoch': 0.64} + 64%|██████▍ | 7787/12188 [16:42:22<8:24:48, 6.88s/it] 64%|██████▍ | 7788/12188 [16:42:30<8:40:27, 7.10s/it] {'loss': 0.3323, 'grad_norm': 0.6891519841248843, 'learning_rate': 3.047064639927622e-06, 'epoch': 0.64} + 64%|██████▍ | 7788/12188 [16:42:30<8:40:27, 7.10s/it] 64%|██████▍ | 7789/12188 [16:42:37<8:36:36, 7.05s/it] {'loss': 0.3263, 'grad_norm': 0.6781163009345883, 'learning_rate': 3.045841547273405e-06, 'epoch': 0.64} + 64%|██████▍ | 7789/12188 [16:42:37<8:36:36, 7.05s/it] 64%|██████▍ | 7790/12188 [16:42:46<9:29:20, 7.77s/it] {'loss': 0.3153, 'grad_norm': 0.6517064844601173, 'learning_rate': 3.04461859261872e-06, 'epoch': 0.64} + 64%|██████▍ | 7790/12188 [16:42:46<9:29:20, 7.77s/it] 64%|██████▍ | 7791/12188 [16:42:55<9:36:19, 7.86s/it] {'loss': 0.2853, 'grad_norm': 0.618788669369427, 'learning_rate': 3.0433957760499265e-06, 'epoch': 0.64} + 64%|██████▍ | 7791/12188 [16:42:55<9:36:19, 7.86s/it] 64%|██████▍ | 7792/12188 [16:43:02<9:17:11, 7.61s/it] {'loss': 0.3541, 'grad_norm': 0.7313311813652529, 'learning_rate': 3.0421730976533805e-06, 'epoch': 0.64} + 64%|██████▍ | 7792/12188 [16:43:02<9:17:11, 7.61s/it] 64%|██████▍ | 7793/12188 [16:43:09<9:07:15, 7.47s/it] {'loss': 0.3461, 'grad_norm': 0.6790154831189619, 'learning_rate': 3.0409505575154254e-06, 'epoch': 0.64} + 64%|██████▍ | 7793/12188 [16:43:09<9:07:15, 7.47s/it] 64%|██████▍ | 7794/12188 [16:43:17<9:19:36, 7.64s/it] {'loss': 0.3104, 'grad_norm': 0.7438733473960171, 'learning_rate': 3.0397281557223934e-06, 'epoch': 0.64} + 64%|██████▍ | 7794/12188 [16:43:17<9:19:36, 7.64s/it] 64%|██████▍ | 7795/12188 [16:43:24<9:10:10, 7.51s/it] {'loss': 0.3002, 'grad_norm': 0.6348597939359599, 'learning_rate': 3.0385058923606107e-06, 'epoch': 0.64} + 64%|██████▍ | 7795/12188 [16:43:24<9:10:10, 7.51s/it] 64%|██████▍ | 7796/12188 [16:43:31<8:59:40, 7.37s/it] {'loss': 0.323, 'grad_norm': 0.6868727191320944, 'learning_rate': 3.037283767516389e-06, 'epoch': 0.64} + 64%|██████▍ | 7796/12188 [16:43:31<8:59:40, 7.37s/it] 64%|██████▍ | 7797/12188 [16:43:38<8:42:45, 7.14s/it] {'loss': 0.319, 'grad_norm': 0.6779647267327232, 'learning_rate': 3.0360617812760355e-06, 'epoch': 0.64} + 64%|██████▍ | 7797/12188 [16:43:38<8:42:45, 7.14s/it] 64%|██████▍ | 7798/12188 [16:43:44<8:33:11, 7.01s/it] {'loss': 0.3242, 'grad_norm': 0.709800112679197, 'learning_rate': 3.0348399337258438e-06, 'epoch': 0.64} + 64%|██████▍ | 7798/12188 [16:43:44<8:33:11, 7.01s/it] 64%|██████▍ | 7799/12188 [16:43:51<8:36:18, 7.06s/it] {'loss': 0.3224, 'grad_norm': 0.6612482934549028, 'learning_rate': 3.0336182249520986e-06, 'epoch': 0.64} + 64%|██████▍ | 7799/12188 [16:43:51<8:36:18, 7.06s/it] 64%|██████▍ | 7800/12188 [16:43:58<8:29:22, 6.97s/it] {'loss': 0.3117, 'grad_norm': 0.6069927803760065, 'learning_rate': 3.0323966550410737e-06, 'epoch': 0.64} + 64%|██████▍ | 7800/12188 [16:43:58<8:29:22, 6.97s/it] 64%|██████▍ | 7801/12188 [16:44:05<8:22:13, 6.87s/it] {'loss': 0.2906, 'grad_norm': 0.679845061045507, 'learning_rate': 3.031175224079038e-06, 'epoch': 0.64} + 64%|██████▍ | 7801/12188 [16:44:05<8:22:13, 6.87s/it] 64%|██████▍ | 7802/12188 [16:44:12<8:32:55, 7.02s/it] {'loss': 0.2786, 'grad_norm': 0.6665156258387546, 'learning_rate': 3.029953932152243e-06, 'epoch': 0.64} + 64%|██████▍ | 7802/12188 [16:44:12<8:32:55, 7.02s/it] 64%|██████▍ | 7803/12188 [16:44:20<8:40:49, 7.13s/it] {'loss': 0.3308, 'grad_norm': 0.6579561914797685, 'learning_rate': 3.0287327793469382e-06, 'epoch': 0.64} + 64%|██████▍ | 7803/12188 [16:44:20<8:40:49, 7.13s/it] 64%|██████▍ | 7804/12188 [16:44:26<8:32:56, 7.02s/it] {'loss': 0.3221, 'grad_norm': 0.6834986973438799, 'learning_rate': 3.0275117657493557e-06, 'epoch': 0.64} + 64%|██████▍ | 7804/12188 [16:44:26<8:32:56, 7.02s/it] 64%|██████▍ | 7805/12188 [16:44:33<8:30:17, 6.99s/it] {'loss': 0.3041, 'grad_norm': 0.6616660118849739, 'learning_rate': 3.0262908914457234e-06, 'epoch': 0.64} + 64%|██████▍ | 7805/12188 [16:44:33<8:30:17, 6.99s/it] 64%|██████▍ | 7806/12188 [16:44:41<8:49:18, 7.25s/it] {'loss': 0.2915, 'grad_norm': 0.7628630461521478, 'learning_rate': 3.0250701565222596e-06, 'epoch': 0.64} + 64%|██████▍ | 7806/12188 [16:44:41<8:49:18, 7.25s/it] 64%|██████▍ | 7807/12188 [16:44:48<8:42:02, 7.15s/it] {'loss': 0.322, 'grad_norm': 0.6493632150315023, 'learning_rate': 3.023849561065165e-06, 'epoch': 0.64} + 64%|██████▍ | 7807/12188 [16:44:48<8:42:02, 7.15s/it] 64%|██████▍ | 7808/12188 [16:44:55<8:38:50, 7.11s/it] {'loss': 0.3434, 'grad_norm': 0.7488627330299291, 'learning_rate': 3.022629105160643e-06, 'epoch': 0.64} + 64%|██████▍ | 7808/12188 [16:44:55<8:38:50, 7.11s/it] 64%|██████▍ | 7809/12188 [16:45:02<8:26:03, 6.93s/it] {'loss': 0.3328, 'grad_norm': 0.7153161835055938, 'learning_rate': 3.021408788894873e-06, 'epoch': 0.64} + 64%|██████▍ | 7809/12188 [16:45:02<8:26:03, 6.93s/it] 64%|██████▍ | 7810/12188 [16:45:09<8:44:50, 7.19s/it] {'loss': 0.3318, 'grad_norm': 0.7138867980168832, 'learning_rate': 3.0201886123540366e-06, 'epoch': 0.64} + 64%|██████▍ | 7810/12188 [16:45:09<8:44:50, 7.19s/it] 64%|██████▍ | 7811/12188 [16:45:17<8:43:00, 7.17s/it] {'loss': 0.3156, 'grad_norm': 0.7233469574992816, 'learning_rate': 3.0189685756243012e-06, 'epoch': 0.64} + 64%|██████▍ | 7811/12188 [16:45:17<8:43:00, 7.17s/it] 64%|██████▍ | 7812/12188 [16:45:23<8:35:49, 7.07s/it] {'loss': 0.2981, 'grad_norm': 0.6515274295096345, 'learning_rate': 3.017748678791821e-06, 'epoch': 0.64} + 64%|██████▍ | 7812/12188 [16:45:23<8:35:49, 7.07s/it] 64%|██████▍ | 7813/12188 [16:45:31<8:43:31, 7.18s/it] {'loss': 0.3314, 'grad_norm': 0.6917718847818376, 'learning_rate': 3.016528921942744e-06, 'epoch': 0.64} + 64%|██████▍ | 7813/12188 [16:45:31<8:43:31, 7.18s/it] 64%|██████▍ | 7814/12188 [16:45:38<8:48:16, 7.25s/it] {'loss': 0.2963, 'grad_norm': 0.6666062574319265, 'learning_rate': 3.015309305163209e-06, 'epoch': 0.64} + 64%|██████▍ | 7814/12188 [16:45:38<8:48:16, 7.25s/it] 64%|██████▍ | 7815/12188 [16:45:46<8:57:31, 7.38s/it] {'loss': 0.3162, 'grad_norm': 0.8424561173968661, 'learning_rate': 3.014089828539341e-06, 'epoch': 0.64} + 64%|██████▍ | 7815/12188 [16:45:46<8:57:31, 7.38s/it] 64%|██████▍ | 7816/12188 [16:45:53<8:42:47, 7.17s/it] {'loss': 0.3513, 'grad_norm': 0.6944531215458454, 'learning_rate': 3.0128704921572607e-06, 'epoch': 0.64} + 64%|██████▍ | 7816/12188 [16:45:53<8:42:47, 7.17s/it] 64%|██████▍ | 7817/12188 [16:46:00<8:44:42, 7.20s/it] {'loss': 0.2923, 'grad_norm': 0.7274371724564016, 'learning_rate': 3.011651296103071e-06, 'epoch': 0.64} + 64%|██████▍ | 7817/12188 [16:46:00<8:44:42, 7.20s/it] 64%|██████▍ | 7818/12188 [16:46:07<8:53:53, 7.33s/it] {'loss': 0.3127, 'grad_norm': 0.6679301792728677, 'learning_rate': 3.010432240462874e-06, 'epoch': 0.64} + 64%|██��███▍ | 7818/12188 [16:46:07<8:53:53, 7.33s/it] 64%|██████▍ | 7819/12188 [16:46:16<9:22:17, 7.72s/it] {'loss': 0.3174, 'grad_norm': 0.7433672337294885, 'learning_rate': 3.0092133253227563e-06, 'epoch': 0.64} + 64%|██████▍ | 7819/12188 [16:46:16<9:22:17, 7.72s/it] 64%|██████▍ | 7820/12188 [16:46:24<9:17:27, 7.66s/it] {'loss': 0.3346, 'grad_norm': 0.9149632976963986, 'learning_rate': 3.007994550768793e-06, 'epoch': 0.64} + 64%|██████▍ | 7820/12188 [16:46:24<9:17:27, 7.66s/it] 64%|██████▍ | 7821/12188 [16:46:32<9:39:19, 7.96s/it] {'loss': 0.2996, 'grad_norm': 0.7154898192118904, 'learning_rate': 3.0067759168870566e-06, 'epoch': 0.64} + 64%|██████▍ | 7821/12188 [16:46:32<9:39:19, 7.96s/it] 64%|██████▍ | 7822/12188 [16:46:39<9:08:23, 7.54s/it] {'loss': 0.3198, 'grad_norm': 0.7200449344034872, 'learning_rate': 3.0055574237635997e-06, 'epoch': 0.64} + 64%|██████▍ | 7822/12188 [16:46:39<9:08:23, 7.54s/it] 64%|██████▍ | 7823/12188 [16:46:46<9:05:43, 7.50s/it] {'loss': 0.3297, 'grad_norm': 0.7371101845080279, 'learning_rate': 3.0043390714844746e-06, 'epoch': 0.64} + 64%|██████▍ | 7823/12188 [16:46:46<9:05:43, 7.50s/it] 64%|██████▍ | 7824/12188 [16:46:54<9:00:43, 7.43s/it] {'loss': 0.3058, 'grad_norm': 0.6837785193002267, 'learning_rate': 3.0031208601357188e-06, 'epoch': 0.64} + 64%|██████▍ | 7824/12188 [16:46:54<9:00:43, 7.43s/it] 64%|██████▍ | 7825/12188 [16:47:01<9:04:23, 7.49s/it] {'loss': 0.2884, 'grad_norm': 0.6016999241048406, 'learning_rate': 3.0019027898033594e-06, 'epoch': 0.64} + 64%|██████▍ | 7825/12188 [16:47:01<9:04:23, 7.49s/it] 64%|██████▍ | 7826/12188 [16:47:09<9:01:56, 7.45s/it] {'loss': 0.2937, 'grad_norm': 0.611405779326475, 'learning_rate': 3.000684860573414e-06, 'epoch': 0.64} + 64%|██████▍ | 7826/12188 [16:47:09<9:01:56, 7.45s/it] 64%|██████▍ | 7827/12188 [16:47:16<8:54:57, 7.36s/it] {'loss': 0.3005, 'grad_norm': 0.5889521605850816, 'learning_rate': 2.999467072531892e-06, 'epoch': 0.64} + 64%|██████▍ | 7827/12188 [16:47:16<8:54:57, 7.36s/it] 64%|██████▍ | 7828/12188 [16:47:23<8:49:22, 7.28s/it] {'loss': 0.3009, 'grad_norm': 0.6577390017864173, 'learning_rate': 2.9982494257647903e-06, 'epoch': 0.64} + 64%|██████▍ | 7828/12188 [16:47:23<8:49:22, 7.28s/it] 64%|██████▍ | 7829/12188 [16:47:30<8:40:04, 7.16s/it] {'loss': 0.3279, 'grad_norm': 0.7395957496905823, 'learning_rate': 2.9970319203581e-06, 'epoch': 0.64} + 64%|██████▍ | 7829/12188 [16:47:30<8:40:04, 7.16s/it] 64%|██████▍ | 7830/12188 [16:47:38<8:56:08, 7.38s/it] {'loss': 0.3037, 'grad_norm': 0.7035972326524855, 'learning_rate': 2.995814556397796e-06, 'epoch': 0.64} + 64%|██████▍ | 7830/12188 [16:47:38<8:56:08, 7.38s/it] 64%|██████▍ | 7831/12188 [16:47:45<9:01:53, 7.46s/it] {'loss': 0.3048, 'grad_norm': 0.7044619277474561, 'learning_rate': 2.994597333969848e-06, 'epoch': 0.64} + 64%|██████▍ | 7831/12188 [16:47:45<9:01:53, 7.46s/it] 64%|██████▍ | 7832/12188 [16:47:52<8:46:01, 7.25s/it] {'loss': 0.341, 'grad_norm': 0.6342716978508968, 'learning_rate': 2.9933802531602153e-06, 'epoch': 0.64} + 64%|██████▍ | 7832/12188 [16:47:52<8:46:01, 7.25s/it] 64%|██████▍ | 7833/12188 [16:48:00<9:06:21, 7.53s/it] {'loss': 0.2818, 'grad_norm': 0.6157272951290039, 'learning_rate': 2.9921633140548446e-06, 'epoch': 0.64} + 64%|██████▍ | 7833/12188 [16:48:00<9:06:21, 7.53s/it] 64%|██████▍ | 7834/12188 [16:48:07<9:00:16, 7.45s/it] {'loss': 0.3282, 'grad_norm': 0.6977014090429889, 'learning_rate': 2.9909465167396764e-06, 'epoch': 0.64} + 64%|██████▍ | 7834/12188 [16:48:07<9:00:16, 7.45s/it] 64%|██████▍ | 7835/12188 [16:48:14<8:46:47, 7.26s/it] {'loss': 0.2971, 'grad_norm': 0.661562347123577, 'learning_rate': 2.9897298613006346e-06, 'epoch': 0.64} + 64%|██████▍ | 7835/12188 [16:48:14<8:46:47, 7.26s/it] 64%|██████▍ | 7836/12188 [16:48:21<8:32:09, 7.06s/it] {'loss': 0.3206, 'grad_norm': 0.6559564512619949, 'learning_rate': 2.988513347823641e-06, 'epoch': 0.64} + 64%|██████▍ | 7836/12188 [16:48:21<8:32:09, 7.06s/it] 64%|██████▍ | 7837/12188 [16:48:29<8:48:28, 7.29s/it] {'loss': 0.3195, 'grad_norm': 0.6438994911304657, 'learning_rate': 2.9872969763946045e-06, 'epoch': 0.64} + 64%|██████▍ | 7837/12188 [16:48:29<8:48:28, 7.29s/it] 64%|██████▍ | 7838/12188 [16:48:36<8:52:54, 7.35s/it] {'loss': 0.3718, 'grad_norm': 0.7088444982372757, 'learning_rate': 2.9860807470994203e-06, 'epoch': 0.64} + 64%|██████▍ | 7838/12188 [16:48:36<8:52:54, 7.35s/it] 64%|██████▍ | 7839/12188 [16:48:43<8:50:40, 7.32s/it] {'loss': 0.2905, 'grad_norm': 0.6304333538233449, 'learning_rate': 2.9848646600239805e-06, 'epoch': 0.64} + 64%|██████▍ | 7839/12188 [16:48:43<8:50:40, 7.32s/it] 64%|██████▍ | 7840/12188 [16:48:51<8:52:05, 7.34s/it] {'loss': 0.2884, 'grad_norm': 0.6902480685911379, 'learning_rate': 2.9836487152541593e-06, 'epoch': 0.64} + 64%|██████▍ | 7840/12188 [16:48:51<8:52:05, 7.34s/it] 64%|██████▍ | 7841/12188 [16:48:57<8:36:33, 7.13s/it] {'loss': 0.3262, 'grad_norm': 0.6342560081869444, 'learning_rate': 2.9824329128758257e-06, 'epoch': 0.64} + 64%|██████▍ | 7841/12188 [16:48:57<8:36:33, 7.13s/it] 64%|██████▍ | 7842/12188 [16:49:05<8:43:15, 7.22s/it] {'loss': 0.3202, 'grad_norm': 0.7409480168897551, 'learning_rate': 2.9812172529748395e-06, 'epoch': 0.64} + 64%|██████▍ | 7842/12188 [16:49:05<8:43:15, 7.22s/it] 64%|██████▍ | 7843/12188 [16:49:12<8:35:02, 7.11s/it] {'loss': 0.2765, 'grad_norm': 0.6636728576391258, 'learning_rate': 2.9800017356370457e-06, 'epoch': 0.64} + 64%|██████▍ | 7843/12188 [16:49:12<8:35:02, 7.11s/it] 64%|██████▍ | 7844/12188 [16:49:18<8:23:13, 6.95s/it] {'loss': 0.3094, 'grad_norm': 0.6973791975289033, 'learning_rate': 2.9787863609482862e-06, 'epoch': 0.64} + 64%|██████▍ | 7844/12188 [16:49:18<8:23:13, 6.95s/it] 64%|██████▍ | 7845/12188 [16:49:26<8:29:59, 7.05s/it] {'loss': 0.3233, 'grad_norm': 0.6687263276299026, 'learning_rate': 2.977571128994385e-06, 'epoch': 0.64} + 64%|██████▍ | 7845/12188 [16:49:26<8:29:59, 7.05s/it] 64%|██████▍ | 7846/12188 [16:49:34<8:55:21, 7.40s/it] {'loss': 0.331, 'grad_norm': 0.6771592618999389, 'learning_rate': 2.9763560398611618e-06, 'epoch': 0.64} + 64%|██████▍ | 7846/12188 [16:49:34<8:55:21, 7.40s/it] 64%|██████▍ | 7847/12188 [16:49:40<8:41:14, 7.20s/it] {'loss': 0.3536, 'grad_norm': 0.7503503454025147, 'learning_rate': 2.975141093634425e-06, 'epoch': 0.64} + 64%|██████▍ | 7847/12188 [16:49:41<8:41:14, 7.20s/it] 64%|██████▍ | 7848/12188 [16:49:47<8:28:42, 7.03s/it] {'loss': 0.3146, 'grad_norm': 0.677093752647987, 'learning_rate': 2.973926290399969e-06, 'epoch': 0.64} + 64%|██████▍ | 7848/12188 [16:49:47<8:28:42, 7.03s/it] 64%|██████▍ | 7849/12188 [16:49:54<8:18:01, 6.89s/it] {'loss': 0.3506, 'grad_norm': 0.6566253071439456, 'learning_rate': 2.9727116302435834e-06, 'epoch': 0.64} + 64%|██████▍ | 7849/12188 [16:49:54<8:18:01, 6.89s/it] 64%|██████▍ | 7850/12188 [16:50:01<8:21:17, 6.93s/it] {'loss': 0.3011, 'grad_norm': 0.6356163072797812, 'learning_rate': 2.9714971132510473e-06, 'epoch': 0.64} + 64%|██████▍ | 7850/12188 [16:50:01<8:21:17, 6.93s/it] 64%|██████▍ | 7851/12188 [16:50:08<8:26:33, 7.01s/it] {'loss': 0.296, 'grad_norm': 0.658584148918262, 'learning_rate': 2.9702827395081236e-06, 'epoch': 0.64} + 64%|██████▍ | 7851/12188 [16:50:08<8:26:33, 7.01s/it] 64%|██████▍ | 7852/12188 [16:50:16<8:46:41, 7.29s/it] {'loss': 0.3408, 'grad_norm': 0.729503951814414, 'learning_rate': 2.9690685091005742e-06, 'epoch': 0.64} + 64%|██████▍ | 7852/12188 [16:50:16<8:46:41, 7.29s/it] 64%|██████▍ | 7853/12188 [16:50:23<8:37:47, 7.17s/it] {'loss': 0.3345, 'grad_norm': 0.7007203427978271, 'learning_rate': 2.9678544221141432e-06, 'epoch': 0.64} + 64%|██████▍ | 7853/12188 [16:50:23<8:37:47, 7.17s/it] 64%|██████▍ | 7854/12188 [16:50:31<9:02:27, 7.51s/it] {'loss': 0.2903, 'grad_norm': 0.666225500184316, 'learning_rate': 2.9666404786345664e-06, 'epoch': 0.64} + 64%|██████▍ | 7854/12188 [16:50:31<9:02:27, 7.51s/it] 64%|██████▍ | 7855/12188 [16:50:39<9:17:50, 7.72s/it] {'loss': 0.3443, 'grad_norm': 0.6968577743421285, 'learning_rate': 2.9654266787475744e-06, 'epoch': 0.64} + 64%|██████▍ | 7855/12188 [16:50:39<9:17:50, 7.72s/it] 64%|██████▍ | 7856/12188 [16:50:47<9:17:49, 7.73s/it] {'loss': 0.2961, 'grad_norm': 0.6240277656376868, 'learning_rate': 2.9642130225388793e-06, 'epoch': 0.64} + 64%|██████▍ | 7856/12188 [16:50:47<9:17:49, 7.73s/it] 64%|██████▍ | 7857/12188 [16:50:54<9:00:36, 7.49s/it] {'loss': 0.3084, 'grad_norm': 0.7328977962303808, 'learning_rate': 2.962999510094192e-06, 'epoch': 0.64} + 64%|██████▍ | 7857/12188 [16:50:54<9:00:36, 7.49s/it] 64%|██████▍ | 7858/12188 [16:51:01<8:46:01, 7.29s/it] {'loss': 0.3164, 'grad_norm': 0.706366268960322, 'learning_rate': 2.9617861414992046e-06, 'epoch': 0.64} + 64%|██████▍ | 7858/12188 [16:51:01<8:46:01, 7.29s/it] 64%|██████▍ | 7859/12188 [16:51:08<8:38:05, 7.18s/it] {'loss': 0.3407, 'grad_norm': 0.6691378155724207, 'learning_rate': 2.9605729168396063e-06, 'epoch': 0.64} + 64%|██████▍ | 7859/12188 [16:51:08<8:38:05, 7.18s/it] 64%|██████▍ | 7860/12188 [16:51:17<9:22:33, 7.80s/it] {'loss': 0.2963, 'grad_norm': 0.6678505134259365, 'learning_rate': 2.959359836201072e-06, 'epoch': 0.64} + 64%|██████▍ | 7860/12188 [16:51:17<9:22:33, 7.80s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 90, in pil_loader + return img.convert("RGB") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 993, in convert + self.load() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 319, in load + raise _get_oserror(err_code, encoder=False) +OSError: unrecognized data stream contents when reading image file +[Try #0] Failed to fetch sample 6014526 in VC:s3://gui-agent/data_20250623/windows_augment/images. Exception: unrecognized data stream contents when reading image file +Problematic sample: {'image': 'inventor/20250512_140254_1/images/before_screenshot_1_id_153_internvl_appearance_crop_1_grounding_instructions_point_o_paste.png', 'conversations': [{'from': 'human', 'value': "\nDetermine the bounding box from: A small button displaying the word 'Home' in standard UI font. The button has a subtle gray background that blends with the interface color scheme and has a standard rectangular shape with rounded corners typical of Autodesk Inventor interface elements."}, {'from': 'gpt', 'value': "A small button displaying the word 'Home' in standard UI font. The button has a subtle gray background that blends with the interface color scheme and has a standard rectangular shape with rounded corners typical of Autodesk Inventor interface elements.[[116, 235, 125, 246]]"}], 'width': 3024, 'height': 1964} + 64%|██████▍ | 7861/12188 [16:51:24<8:58:03, 7.46s/it] {'loss': 0.3135, 'grad_norm': 0.6345569345666814, 'learning_rate': 2.9581468996692675e-06, 'epoch': 0.64} + 64%|██████▍ | 7861/12188 [16:51:24<8:58:03, 7.46s/it] 65%|██████▍ | 7862/12188 [16:51:31<8:51:49, 7.38s/it] {'loss': 0.3053, 'grad_norm': 0.6565842502155249, 'learning_rate': 2.9569341073298496e-06, 'epoch': 0.65} + 65%|██████▍ | 7862/12188 [16:51:31<8:51:49, 7.38s/it] 65%|██████▍ | 7863/12188 [16:51:38<8:56:55, 7.45s/it] {'loss': 0.3093, 'grad_norm': 0.7232982111333305, 'learning_rate': 2.9557214592684603e-06, 'epoch': 0.65} + 65%|██████▍ | 7863/12188 [16:51:38<8:56:55, 7.45s/it] 65%|██████▍ | 7864/12188 [16:51:46<8:59:51, 7.49s/it] {'loss': 0.2812, 'grad_norm': 0.6796880659720962, 'learning_rate': 2.9545089555707374e-06, 'epoch': 0.65} + 65%|██████▍ | 7864/12188 [16:51:46<8:59:51, 7.49s/it] 65%|██████▍ | 7865/12188 [16:51:53<8:48:54, 7.34s/it] {'loss': 0.3, 'grad_norm': 0.6307854246649054, 'learning_rate': 2.9532965963223076e-06, 'epoch': 0.65} + 65%|██████▍ | 7865/12188 [16:51:53<8:48:54, 7.34s/it] 65%|██████▍ | 7866/12188 [16:52:00<8:34:15, 7.14s/it] {'loss': 0.3121, 'grad_norm': 0.6285542849770379, 'learning_rate': 2.9520843816087817e-06, 'epoch': 0.65} + 65%|██████▍ | 7866/12188 [16:52:00<8:34:15, 7.14s/it] 65%|██████▍ | 7867/12188 [16:52:07<8:37:18, 7.18s/it] {'loss': 0.3797, 'grad_norm': 0.6785440817723829, 'learning_rate': 2.9508723115157677e-06, 'epoch': 0.65} + 65%|██████▍ | 7867/12188 [16:52:07<8:37:18, 7.18s/it] 65%|██████▍ | 7868/12188 [16:52:14<8:36:04, 7.17s/it] {'loss': 0.2778, 'grad_norm': 0.6327937214673227, 'learning_rate': 2.94966038612886e-06, 'epoch': 0.65} + 65%|██████▍ | 7868/12188 [16:52:14<8:36:04, 7.17s/it] 65%|██████▍ | 7869/12188 [16:52:21<8:34:05, 7.14s/it] {'loss': 0.3013, 'grad_norm': 0.6718698155949103, 'learning_rate': 2.9484486055336397e-06, 'epoch': 0.65} + 65%|██████▍ | 7869/12188 [16:52:21<8:34:05, 7.14s/it] 65%|██████▍ | 7870/12188 [16:52:28<8:29:56, 7.09s/it] {'loss': 0.3026, 'grad_norm': 0.6398283718344047, 'learning_rate': 2.9472369698156848e-06, 'epoch': 0.65} + 65%|██████▍ | 7870/12188 [16:52:28<8:29:56, 7.09s/it] 65%|██████▍ | 7871/12188 [16:52:35<8:23:55, 7.00s/it] {'loss': 0.3308, 'grad_norm': 0.6965766624832384, 'learning_rate': 2.946025479060555e-06, 'epoch': 0.65} + 65%|██████▍ | 7871/12188 [16:52:35<8:23:55, 7.00s/it] 65%|██████▍ | 7872/12188 [16:52:45<9:34:34, 7.99s/it] {'loss': 0.3116, 'grad_norm': 0.6433599274318208, 'learning_rate': 2.944814133353806e-06, 'epoch': 0.65} + 65%|██████▍ | 7872/12188 [16:52:45<9:34:34, 7.99s/it] 65%|██████▍ | 7873/12188 [16:52:52<9:09:57, 7.65s/it] {'loss': 0.3231, 'grad_norm': 0.7067240700004009, 'learning_rate': 2.9436029327809827e-06, 'epoch': 0.65} + 65%|██████▍ | 7873/12188 [16:52:52<9:09:57, 7.65s/it] 65%|██████▍ | 7874/12188 [16:53:00<9:17:33, 7.75s/it] {'loss': 0.3172, 'grad_norm': 0.6604995479018346, 'learning_rate': 2.942391877427616e-06, 'epoch': 0.65} + 65%|██████▍ | 7874/12188 [16:53:00<9:17:33, 7.75s/it] 65%|██████▍ | 7875/12188 [16:53:07<8:53:52, 7.43s/it] {'loss': 0.3214, 'grad_norm': 0.7459003909179422, 'learning_rate': 2.94118096737923e-06, 'epoch': 0.65} + 65%|██████▍ | 7875/12188 [16:53:07<8:53:52, 7.43s/it] 65%|██████▍ | 7876/12188 [16:53:13<8:37:38, 7.20s/it] {'loss': 0.3048, 'grad_norm': 0.681660223001046, 'learning_rate': 2.939970202721334e-06, 'epoch': 0.65} + 65%|██████▍ | 7876/12188 [16:53:13<8:37:38, 7.20s/it] 65%|██████▍ | 7877/12188 [16:53:21<8:42:52, 7.28s/it] {'loss': 0.3279, 'grad_norm': 0.6096752889595506, 'learning_rate': 2.9387595835394323e-06, 'epoch': 0.65} + 65%|██████▍ | 7877/12188 [16:53:21<8:42:52, 7.28s/it] 65%|██████▍ | 7878/12188 [16:53:28<8:49:28, 7.37s/it] {'loss': 0.328, 'grad_norm': 0.6597694198613703, 'learning_rate': 2.93754910991902e-06, 'epoch': 0.65} + 65%|██████▍ | 7878/12188 [16:53:28<8:49:28, 7.37s/it] 65%|██████▍ | 7879/12188 [16:53:36<9:01:39, 7.54s/it] {'loss': 0.2535, 'grad_norm': 1.047558522241843, 'learning_rate': 2.9363387819455735e-06, 'epoch': 0.65} + 65%|██████▍ | 7879/12188 [16:53:36<9:01:39, 7.54s/it] 65%|██████▍ | 7880/12188 [16:53:44<9:00:11, 7.52s/it] {'loss': 0.3104, 'grad_norm': 0.7894562138932683, 'learning_rate': 2.9351285997045685e-06, 'epoch': 0.65} + 65%|██████▍ | 7880/12188 [16:53:44<9:00:11, 7.52s/it] 65%|██████▍ | 7881/12188 [16:53:51<8:42:29, 7.28s/it] {'loss': 0.3273, 'grad_norm': 0.735184208502519, 'learning_rate': 2.9339185632814636e-06, 'epoch': 0.65} + 65%|██████▍ | 7881/12188 [16:53:51<8:42:29, 7.28s/it] 65%|██████▍ | 7882/12188 [16:53:57<8:31:32, 7.13s/it] {'loss': 0.2951, 'grad_norm': 0.6554458016933145, 'learning_rate': 2.932708672761709e-06, 'epoch': 0.65} + 65%|██████▍ | 7882/12188 [16:53:57<8:31:32, 7.13s/it] 65%|██████▍ | 7883/12188 [16:54:05<8:40:03, 7.25s/it] {'loss': 0.2818, 'grad_norm': 0.6439430117587528, 'learning_rate': 2.9314989282307493e-06, 'epoch': 0.65} + 65%|██████▍ | 7883/12188 [16:54:05<8:40:03, 7.25s/it] 65%|██████▍ | 7884/12188 [16:54:12<8:37:27, 7.21s/it] {'loss': 0.3076, 'grad_norm': 0.7122216663510784, 'learning_rate': 2.9302893297740097e-06, 'epoch': 0.65} + 65%|██████▍ | 7884/12188 [16:54:12<8:37:27, 7.21s/it] 65%|██████▍ | 7885/12188 [16:54:19<8:27:34, 7.08s/it] {'loss': 0.3004, 'grad_norm': 0.6549923454334292, 'learning_rate': 2.9290798774769124e-06, 'epoch': 0.65} + 65%|██████▍ | 7885/12188 [16:54:19<8:27:34, 7.08s/it] 65%|██████▍ | 7886/12188 [16:54:25<8:20:06, 6.98s/it] {'loss': 0.3283, 'grad_norm': 0.6549517521545923, 'learning_rate': 2.927870571424869e-06, 'epoch': 0.65} + 65%|██████▍ | 7886/12188 [16:54:25<8:20:06, 6.98s/it] 65%|██████▍ | 7887/12188 [16:54:33<8:34:32, 7.18s/it] {'loss': 0.2558, 'grad_norm': 0.6562485794043544, 'learning_rate': 2.9266614117032755e-06, 'epoch': 0.65} + 65%|██████▍ | 7887/12188 [16:54:33<8:34:32, 7.18s/it] 65%|██████▍ | 7888/12188 [16:54:40<8:37:32, 7.22s/it] {'loss': 0.3139, 'grad_norm': 0.7253378547406737, 'learning_rate': 2.9254523983975224e-06, 'epoch': 0.65} + 65%|██████▍ | 7888/12188 [16:54:40<8:37:32, 7.22s/it] 65%|██████▍ | 7889/12188 [16:54:48<8:34:17, 7.18s/it] {'loss': 0.3126, 'grad_norm': 0.7418256418922389, 'learning_rate': 2.9242435315929874e-06, 'epoch': 0.65} + 65%|██████▍ | 7889/12188 [16:54:48<8:34:17, 7.18s/it] 65%|██████▍ | 7890/12188 [16:54:54<8:26:18, 7.07s/it] {'loss': 0.3037, 'grad_norm': 0.7778057691949133, 'learning_rate': 2.923034811375038e-06, 'epoch': 0.65} + 65%|██████▍ | 7890/12188 [16:54:54<8:26:18, 7.07s/it] 65%|██████▍ | 7891/12188 [16:55:02<8:33:15, 7.17s/it] {'loss': 0.3286, 'grad_norm': 0.6774589806639804, 'learning_rate': 2.9218262378290364e-06, 'epoch': 0.65} + 65%|██████▍ | 7891/12188 [16:55:02<8:33:15, 7.17s/it] 65%|██████▍ | 7892/12188 [16:55:10<8:50:44, 7.41s/it] {'loss': 0.2966, 'grad_norm': 0.725690248453789, 'learning_rate': 2.920617811040324e-06, 'epoch': 0.65} + 65%|██████▍ | 7892/12188 [16:55:10<8:50:44, 7.41s/it] 65%|██████▍ | 7893/12188 [16:55:17<8:56:58, 7.50s/it] {'loss': 0.2718, 'grad_norm': 0.6693992701819582, 'learning_rate': 2.9194095310942438e-06, 'epoch': 0.65} + 65%|██████▍ | 7893/12188 [16:55:17<8:56:58, 7.50s/it] 65%|██████▍ | 7894/12188 [16:55:25<9:02:34, 7.58s/it] {'loss': 0.3021, 'grad_norm': 0.6873170391610772, 'learning_rate': 2.9182013980761183e-06, 'epoch': 0.65} + 65%|██████▍ | 7894/12188 [16:55:25<9:02:34, 7.58s/it] 65%|██████▍ | 7895/12188 [16:55:32<8:44:34, 7.33s/it] {'loss': 0.3586, 'grad_norm': 0.7516683444515576, 'learning_rate': 2.9169934120712647e-06, 'epoch': 0.65} + 65%|██████▍ | 7895/12188 [16:55:32<8:44:34, 7.33s/it] 65%|██████▍ | 7896/12188 [16:55:40<8:52:17, 7.44s/it] {'loss': 0.272, 'grad_norm': 0.6757927350022866, 'learning_rate': 2.9157855731649908e-06, 'epoch': 0.65} + 65%|██████▍ | 7896/12188 [16:55:40<8:52:17, 7.44s/it] 65%|██████▍ | 7897/12188 [16:55:47<8:44:53, 7.34s/it] {'loss': 0.3111, 'grad_norm': 0.6537422001065344, 'learning_rate': 2.9145778814425905e-06, 'epoch': 0.65} + 65%|██████▍ | 7897/12188 [16:55:47<8:44:53, 7.34s/it] 65%|██████▍ | 7898/12188 [16:55:55<8:55:16, 7.49s/it] {'loss': 0.3189, 'grad_norm': 0.7044753779089296, 'learning_rate': 2.913370336989351e-06, 'epoch': 0.65} + 65%|██████▍ | 7898/12188 [16:55:55<8:55:16, 7.49s/it] 65%|██████▍ | 7899/12188 [16:56:03<9:18:11, 7.81s/it] {'loss': 0.3178, 'grad_norm': 0.7060317684342294, 'learning_rate': 2.912162939890544e-06, 'epoch': 0.65} + 65%|██████▍ | 7899/12188 [16:56:03<9:18:11, 7.81s/it] 65%|██████▍ | 7900/12188 [16:56:10<9:00:25, 7.56s/it] {'loss': 0.3055, 'grad_norm': 0.7287152758109418, 'learning_rate': 2.9109556902314362e-06, 'epoch': 0.65} + 65%|██████▍ | 7900/12188 [16:56:10<9:00:25, 7.56s/it] 65%|██████▍ | 7901/12188 [16:56:18<9:01:38, 7.58s/it] {'loss': 0.3016, 'grad_norm': 0.6478130774693637, 'learning_rate': 2.9097485880972825e-06, 'epoch': 0.65} + 65%|██████▍ | 7901/12188 [16:56:18<9:01:38, 7.58s/it] 65%|██████▍ | 7902/12188 [16:56:25<8:48:17, 7.40s/it] {'loss': 0.3207, 'grad_norm': 0.7247778474563674, 'learning_rate': 2.908541633573324e-06, 'epoch': 0.65} + 65%|██████▍ | 7902/12188 [16:56:25<8:48:17, 7.40s/it] 65%|██████▍ | 7903/12188 [16:56:32<8:39:09, 7.27s/it] {'loss': 0.3258, 'grad_norm': 0.6485163481098979, 'learning_rate': 2.9073348267447944e-06, 'epoch': 0.65} + 65%|██████▍ | 7903/12188 [16:56:32<8:39:09, 7.27s/it] 65%|██████▍ | 7904/12188 [16:56:40<8:58:19, 7.54s/it] {'loss': 0.2776, 'grad_norm': 0.6307363673488708, 'learning_rate': 2.90612816769692e-06, 'epoch': 0.65} + 65%|██████▍ | 7904/12188 [16:56:40<8:58:19, 7.54s/it] 65%|██████▍ | 7905/12188 [16:56:46<8:37:43, 7.25s/it] {'loss': 0.3375, 'grad_norm': 0.7484924495848486, 'learning_rate': 2.9049216565149098e-06, 'epoch': 0.65} + 65%|██████▍ | 7905/12188 [16:56:46<8:37:43, 7.25s/it] 65%|██████▍ | 7906/12188 [16:56:54<8:44:30, 7.35s/it] {'loss': 0.3316, 'grad_norm': 0.6933302123289165, 'learning_rate': 2.9037152932839662e-06, 'epoch': 0.65} + 65%|██████▍ | 7906/12188 [16:56:54<8:44:30, 7.35s/it] 65%|██████▍ | 7907/12188 [16:57:01<8:40:15, 7.29s/it] {'loss': 0.2963, 'grad_norm': 0.851775352987921, 'learning_rate': 2.902509078089279e-06, 'epoch': 0.65} + 65%|██████▍ | 7907/12188 [16:57:01<8:40:15, 7.29s/it] 65%|██████▍ | 7908/12188 [16:57:08<8:30:36, 7.16s/it] {'loss': 0.3573, 'grad_norm': 0.7364113791189463, 'learning_rate': 2.9013030110160306e-06, 'epoch': 0.65} + 65%|██████▍ | 7908/12188 [16:57:08<8:30:36, 7.16s/it] 65%|██████▍ | 7909/12188 [16:57:15<8:29:52, 7.15s/it] {'loss': 0.3298, 'grad_norm': 0.7421336792863152, 'learning_rate': 2.900097092149394e-06, 'epoch': 0.65} + 65%|██████▍ | 7909/12188 [16:57:15<8:29:52, 7.15s/it] 65%|██████▍ | 7910/12188 [16:57:22<8:26:14, 7.10s/it] {'loss': 0.3246, 'grad_norm': 0.6615825562775031, 'learning_rate': 2.8988913215745247e-06, 'epoch': 0.65} + 65%|██████▍ | 7910/12188 [16:57:22<8:26:14, 7.10s/it] 65%|██████▍ | 7911/12188 [16:57:29<8:31:14, 7.17s/it] {'loss': 0.3229, 'grad_norm': 0.7023137444472821, 'learning_rate': 2.8976856993765766e-06, 'epoch': 0.65} + 65%|██████▍ | 7911/12188 [16:57:29<8:31:14, 7.17s/it] 65%|██████▍ | 7912/12188 [16:57:36<8:18:52, 7.00s/it] {'loss': 0.3264, 'grad_norm': 0.7331880495541492, 'learning_rate': 2.896480225640685e-06, 'epoch': 0.65} + 65%|██████▍ | 7912/12188 [16:57:36<8:18:52, 7.00s/it] 65%|██████▍ | 7913/12188 [16:57:44<8:38:24, 7.28s/it] {'loss': 0.2926, 'grad_norm': 0.678827914168673, 'learning_rate': 2.89527490045198e-06, 'epoch': 0.65} + 65%|██████▍ | 7913/12188 [16:57:44<8:38:24, 7.28s/it] 65%|██████▍ | 7914/12188 [16:57:52<8:53:24, 7.49s/it] {'loss': 0.2762, 'grad_norm': 0.655881840378925, 'learning_rate': 2.8940697238955823e-06, 'epoch': 0.65} + 65%|██████▍ | 7914/12188 [16:57:52<8:53:24, 7.49s/it] 65%|██████▍ | 7915/12188 [16:57:59<8:39:54, 7.30s/it] {'loss': 0.3001, 'grad_norm': 0.6444815807167679, 'learning_rate': 2.8928646960565955e-06, 'epoch': 0.65} + 65%|██████▍ | 7915/12188 [16:57:59<8:39:54, 7.30s/it] 65%|██████▍ | 7916/12188 [16:58:06<8:30:24, 7.17s/it] {'loss': 0.2932, 'grad_norm': 0.6411143770305764, 'learning_rate': 2.89165981702012e-06, 'epoch': 0.65} + 65%|██████▍ | 7916/12188 [16:58:06<8:30:24, 7.17s/it] 65%|██████▍ | 7917/12188 [16:58:13<8:43:21, 7.35s/it] {'loss': 0.2803, 'grad_norm': 0.7127931672417895, 'learning_rate': 2.890455086871239e-06, 'epoch': 0.65} + 65%|██████▍ | 7917/12188 [16:58:13<8:43:21, 7.35s/it] 65%|██████▍ | 7918/12188 [16:58:20<8:30:10, 7.17s/it] {'loss': 0.2894, 'grad_norm': 0.6605409883897024, 'learning_rate': 2.889250505695033e-06, 'epoch': 0.65} + 65%|██████▍ | 7918/12188 [16:58:20<8:30:10, 7.17s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 90, in pil_loader + return img.convert("RGB") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 993, in convert + self.load() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 319, in load + raise _get_oserror(err_code, encoder=False) +OSError: unrecognized data stream contents when reading image file +[Try #0] Failed to fetch sample 6015863 in VC:s3://gui-agent/data_20250623/windows_augment/images. Exception: unrecognized data stream contents when reading image file +Problematic sample: {'image': 'inventor/20250513_095212_1/images/before_screenshot_1_id_68_internvl_position_crop_0_grounding_instructions_point_o_paste.png', 'conversations': [{'from': 'human', 'value': '\nOutput only the bounding box in your response. Located at the top portion of the Autodesk Inventor window, within the main navigation bar. It is positioned near the left side of the interface, under the main application menu and above the modeling workspace that contains a 3D cylinder model.'}, {'from': 'gpt', 'value': 'Located at the top portion of the Autodesk Inventor window, within the main navigation bar. It is positioned near the left side of the interface, under the main application menu and above the modeling workspace that contains a 3D cylinder model.[[583, 500, 605, 508]]'}], 'width': 3600, 'height': 2338} + 65%|██████▍ | 7919/12188 [16:58:28<8:42:26, 7.34s/it] {'loss': 0.3059, 'grad_norm': 0.641909636316265, 'learning_rate': 2.8880460735765625e-06, 'epoch': 0.65} + 65%|██████▍ | 7919/12188 [16:58:28<8:42:26, 7.34s/it] 65%|██████▍ | 7920/12188 [16:58:35<8:28:03, 7.14s/it] {'loss': 0.3103, 'grad_norm': 0.6724373149057047, 'learning_rate': 2.886841790600887e-06, 'epoch': 0.65} + 65%|██████▍ | 7920/12188 [16:58:35<8:28:03, 7.14s/it] 65%|██████▍ | 7921/12188 [16:58:42<8:29:53, 7.17s/it] {'loss': 0.3144, 'grad_norm': 0.6568583675555307, 'learning_rate': 2.8856376568530475e-06, 'epoch': 0.65} + 65%|██████▍ | 7921/12188 [16:58:42<8:29:53, 7.17s/it] 65%|██████▍ | 7922/12188 [16:58:49<8:25:26, 7.11s/it] {'loss': 0.3071, 'grad_norm': 0.6946086189934464, 'learning_rate': 2.884433672418082e-06, 'epoch': 0.65} + 65%|██████▍ | 7922/12188 [16:58:49<8:25:26, 7.11s/it] 65%|██████▌ | 7923/12188 [16:58:56<8:25:05, 7.11s/it] {'loss': 0.2741, 'grad_norm': 0.6914409892436414, 'learning_rate': 2.8832298373810097e-06, 'epoch': 0.65} + 65%|██████▌ | 7923/12188 [16:58:56<8:25:05, 7.11s/it] 65%|██████▌ | 7924/12188 [16:59:03<8:24:57, 7.11s/it] {'loss': 0.2988, 'grad_norm': 0.6285796170141753, 'learning_rate': 2.8820261518268467e-06, 'epoch': 0.65} + 65%|██████▌ | 7924/12188 [16:59:03<8:24:57, 7.11s/it] 65%|██████▌ | 7925/12188 [16:59:12<8:53:53, 7.51s/it] {'loss': 0.3656, 'grad_norm': 0.6772474665238627, 'learning_rate': 2.880822615840593e-06, 'epoch': 0.65} + 65%|██████▌ | 7925/12188 [16:59:12<8:53:53, 7.51s/it] 65%|██████▌ | 7926/12188 [16:59:18<8:32:36, 7.22s/it] {'loss': 0.3081, 'grad_norm': 0.7547696272140891, 'learning_rate': 2.8796192295072407e-06, 'epoch': 0.65} + 65%|██████▌ | 7926/12188 [16:59:18<8:32:36, 7.22s/it] 65%|██████▌ | 7927/12188 [16:59:25<8:25:26, 7.12s/it] {'loss': 0.3194, 'grad_norm': 0.6443260390877436, 'learning_rate': 2.878415992911774e-06, 'epoch': 0.65} + 65%|██████▌ | 7927/12188 [16:59:25<8:25:26, 7.12s/it] 65%|██████▌ | 7928/12188 [16:59:33<8:51:26, 7.49s/it] {'loss': 0.2978, 'grad_norm': 0.6618860837241534, 'learning_rate': 2.8772129061391597e-06, 'epoch': 0.65} + 65%|██████▌ | 7928/12188 [16:59:33<8:51:26, 7.49s/it] 65%|██████▌ | 7929/12188 [16:59:41<8:51:37, 7.49s/it] {'loss': 0.3161, 'grad_norm': 0.6992622902608143, 'learning_rate': 2.8760099692743605e-06, 'epoch': 0.65} + 65%|██████▌ | 7929/12188 [16:59:41<8:51:37, 7.49s/it] 65%|██████▌ | 7930/12188 [16:59:48<8:47:55, 7.44s/it] {'loss': 0.3239, 'grad_norm': 0.7030085896258407, 'learning_rate': 2.8748071824023237e-06, 'epoch': 0.65} + 65%|██████▌ | 7930/12188 [16:59:48<8:47:55, 7.44s/it] 65%|██████▌ | 7931/12188 [16:59:55<8:38:52, 7.31s/it] {'loss': 0.3135, 'grad_norm': 0.6825206759098832, 'learning_rate': 2.8736045456079886e-06, 'epoch': 0.65} + 65%|██████▌ | 7931/12188 [16:59:55<8:38:52, 7.31s/it] 65%|██████▌ | 7932/12188 [17:00:02<8:29:05, 7.18s/it] {'loss': 0.318, 'grad_norm': 0.6858175432462865, 'learning_rate': 2.872402058976287e-06, 'epoch': 0.65} + 65%|██████▌ | 7932/12188 [17:00:02<8:29:05, 7.18s/it] 65%|██████▌ | 7933/12188 [17:00:09<8:25:44, 7.13s/it] {'loss': 0.3307, 'grad_norm': 0.7338919818855838, 'learning_rate': 2.871199722592133e-06, 'epoch': 0.65} + 65%|██████▌ | 7933/12188 [17:00:09<8:25:44, 7.13s/it] 65%|██████▌ | 7934/12188 [17:00:16<8:19:46, 7.05s/it] {'loss': 0.3363, 'grad_norm': 0.6396812443565536, 'learning_rate': 2.869997536540435e-06, 'epoch': 0.65} + 65%|██████▌ | 7934/12188 [17:00:16<8:19:46, 7.05s/it] 65%|██████▌ | 7935/12188 [17:00:24<8:32:55, 7.24s/it] {'loss': 0.3123, 'grad_norm': 0.6121799695825972, 'learning_rate': 2.8687955009060865e-06, 'epoch': 0.65} + 65%|██████▌ | 7935/12188 [17:00:24<8:32:55, 7.24s/it] 65%|██████▌ | 7936/12188 [17:00:32<8:53:37, 7.53s/it] {'loss': 0.2856, 'grad_norm': 0.6414855678259733, 'learning_rate': 2.867593615773976e-06, 'epoch': 0.65} + 65%|██████▌ | 7936/12188 [17:00:32<8:53:37, 7.53s/it] 65%|██████▌ | 7937/12188 [17:00:38<8:36:57, 7.30s/it] {'loss': 0.2706, 'grad_norm': 0.6352504731407603, 'learning_rate': 2.86639188122898e-06, 'epoch': 0.65} + 65%|██████▌ | 7937/12188 [17:00:38<8:36:57, 7.30s/it] 65%|██████▌ | 7938/12188 [17:00:45<8:28:01, 7.17s/it] {'loss': 0.3444, 'grad_norm': 0.6499272350889234, 'learning_rate': 2.8651902973559596e-06, 'epoch': 0.65} + 65%|██████▌ | 7938/12188 [17:00:45<8:28:01, 7.17s/it] 65%|██████▌ | 7939/12188 [17:00:53<8:43:14, 7.39s/it] {'loss': 0.3157, 'grad_norm': 0.6564125904487961, 'learning_rate': 2.8639888642397707e-06, 'epoch': 0.65} + 65%|██████▌ | 7939/12188 [17:00:53<8:43:14, 7.39s/it] 65%|██████▌ | 7940/12188 [17:01:01<8:49:55, 7.48s/it] {'loss': 0.297, 'grad_norm': 0.6814807608950137, 'learning_rate': 2.8627875819652577e-06, 'epoch': 0.65} + 65%|██████▌ | 7940/12188 [17:01:01<8:49:55, 7.48s/it] 65%|██████▌ | 7941/12188 [17:01:08<8:37:35, 7.31s/it] {'loss': 0.3329, 'grad_norm': 0.8397677299349381, 'learning_rate': 2.8615864506172507e-06, 'epoch': 0.65} + 65%|██████▌ | 7941/12188 [17:01:08<8:37:35, 7.31s/it] 65%|██████▌ | 7942/12188 [17:01:14<8:21:40, 7.09s/it] {'loss': 0.3101, 'grad_norm': 0.6801672702668141, 'learning_rate': 2.860385470280575e-06, 'epoch': 0.65} + 65%|██████▌ | 7942/12188 [17:01:14<8:21:40, 7.09s/it] 65%|██████▌ | 7943/12188 [17:01:22<8:20:44, 7.08s/it] {'loss': 0.2953, 'grad_norm': 0.6765277364038924, 'learning_rate': 2.8591846410400377e-06, 'epoch': 0.65} + 65%|██████▌ | 7943/12188 [17:01:22<8:20:44, 7.08s/it] 65%|██████▌ | 7944/12188 [17:01:30<8:50:36, 7.50s/it] {'loss': 0.297, 'grad_norm': 0.7350482531954359, 'learning_rate': 2.857983962980441e-06, 'epoch': 0.65} + 65%|██████▌ | 7944/12188 [17:01:30<8:50:36, 7.50s/it] 65%|██████▌ | 7945/12188 [17:01:38<8:55:06, 7.57s/it] {'loss': 0.3192, 'grad_norm': 0.7049472355009058, 'learning_rate': 2.856783436186578e-06, 'epoch': 0.65} + 65%|██████▌ | 7945/12188 [17:01:38<8:55:06, 7.57s/it] 65%|██████▌ | 7946/12188 [17:01:45<8:43:57, 7.41s/it] {'loss': 0.3155, 'grad_norm': 0.6982644096731664, 'learning_rate': 2.8555830607432254e-06, 'epoch': 0.65} + 65%|██████▌ | 7946/12188 [17:01:45<8:43:57, 7.41s/it] 65%|██████▌ | 7947/12188 [17:01:52<8:46:21, 7.45s/it] {'loss': 0.2905, 'grad_norm': 0.6958384888013761, 'learning_rate': 2.85438283673515e-06, 'epoch': 0.65} + 65%|██████▌ | 7947/12188 [17:01:52<8:46:21, 7.45s/it] 65%|██████▌ | 7948/12188 [17:01:59<8:30:09, 7.22s/it] {'loss': 0.3291, 'grad_norm': 0.7398985371162089, 'learning_rate': 2.853182764247113e-06, 'epoch': 0.65} + 65%|██████▌ | 7948/12188 [17:01:59<8:30:09, 7.22s/it] 65%|██████▌ | 7949/12188 [17:02:06<8:26:54, 7.17s/it] {'loss': 0.2979, 'grad_norm': 0.7179507957991076, 'learning_rate': 2.8519828433638588e-06, 'epoch': 0.65} + 65%|██████▌ | 7949/12188 [17:02:06<8:26:54, 7.17s/it] 65%|██████▌ | 7950/12188 [17:02:14<8:42:20, 7.40s/it] {'loss': 0.3057, 'grad_norm': 0.6863901151014533, 'learning_rate': 2.850783074170126e-06, 'epoch': 0.65} + 65%|██████▌ | 7950/12188 [17:02:14<8:42:20, 7.40s/it] 65%|██████▌ | 7951/12188 [17:02:21<8:37:15, 7.32s/it] {'loss': 0.3358, 'grad_norm': 0.8325304341959844, 'learning_rate': 2.849583456750639e-06, 'epoch': 0.65} + 65%|██████▌ | 7951/12188 [17:02:21<8:37:15, 7.32s/it] 65%|██████▌ | 7952/12188 [17:02:29<8:43:56, 7.42s/it] {'loss': 0.2829, 'grad_norm': 0.6687921145996351, 'learning_rate': 2.8483839911901133e-06, 'epoch': 0.65} + 65%|██████▌ | 7952/12188 [17:02:29<8:43:56, 7.42s/it] 65%|██████▌ | 7953/12188 [17:02:36<8:42:29, 7.40s/it] {'loss': 0.3292, 'grad_norm': 0.6373976290196698, 'learning_rate': 2.8471846775732527e-06, 'epoch': 0.65} + 65%|██████▌ | 7953/12188 [17:02:36<8:42:29, 7.40s/it] 65%|██████▌ | 7954/12188 [17:02:44<8:44:05, 7.43s/it] {'loss': 0.3366, 'grad_norm': 0.7797742406055792, 'learning_rate': 2.8459855159847516e-06, 'epoch': 0.65} + 65%|██████▌ | 7954/12188 [17:02:44<8:44:05, 7.43s/it] 65%|██████▌ | 7955/12188 [17:02:50<8:29:55, 7.23s/it] {'loss': 0.3123, 'grad_norm': 0.8092259947762614, 'learning_rate': 2.844786506509294e-06, 'epoch': 0.65} + 65%|██████▌ | 7955/12188 [17:02:50<8:29:55, 7.23s/it] 65%|██████▌ | 7956/12188 [17:02:58<8:45:08, 7.45s/it] {'loss': 0.2888, 'grad_norm': 0.7036453370856735, 'learning_rate': 2.8435876492315493e-06, 'epoch': 0.65} + 65%|██████▌ | 7956/12188 [17:02:58<8:45:08, 7.45s/it] 65%|██████▌ | 7957/12188 [17:03:05<8:37:34, 7.34s/it] {'loss': 0.2994, 'grad_norm': 0.698869530473606, 'learning_rate': 2.8423889442361797e-06, 'epoch': 0.65} + 65%|██████▌ | 7957/12188 [17:03:05<8:37:34, 7.34s/it] 65%|██████▌ | 7958/12188 [17:03:12<8:30:32, 7.24s/it] {'loss': 0.3229, 'grad_norm': 0.6699415265517371, 'learning_rate': 2.841190391607839e-06, 'epoch': 0.65} + 65%|██████▌ | 7958/12188 [17:03:12<8:30:32, 7.24s/it] 65%|██████▌ | 7959/12188 [17:03:19<8:25:43, 7.18s/it] {'loss': 0.3456, 'grad_norm': 0.6516444464727982, 'learning_rate': 2.839991991431162e-06, 'epoch': 0.65} + 65%|██████▌ | 7959/12188 [17:03:19<8:25:43, 7.18s/it] 65%|██████▌ | 7960/12188 [17:03:27<8:35:27, 7.31s/it] {'loss': 0.3328, 'grad_norm': 0.6406807813078994, 'learning_rate': 2.838793743790782e-06, 'epoch': 0.65} + 65%|██████▌ | 7960/12188 [17:03:27<8:35:27, 7.31s/it] 65%|██████▌ | 7961/12188 [17:03:34<8:32:00, 7.27s/it] {'loss': 0.3361, 'grad_norm': 0.6863490521841865, 'learning_rate': 2.8375956487713163e-06, 'epoch': 0.65} + 65%|██████▌ | 7961/12188 [17:03:34<8:32:00, 7.27s/it] 65%|██████▌ | 7962/12188 [17:03:41<8:18:21, 7.08s/it] {'loss': 0.3028, 'grad_norm': 0.6987511602099447, 'learning_rate': 2.836397706457369e-06, 'epoch': 0.65} + 65%|██████▌ | 7962/12188 [17:03:41<8:18:21, 7.08s/it] 65%|██████▌ | 7963/12188 [17:03:49<8:42:19, 7.42s/it] {'loss': 0.2993, 'grad_norm': 0.6928213109037088, 'learning_rate': 2.8351999169335422e-06, 'epoch': 0.65} + 65%|██████▌ | 7963/12188 [17:03:49<8:42:19, 7.42s/it] 65%|██████▌ | 7964/12188 [17:03:56<8:25:59, 7.19s/it] {'loss': 0.274, 'grad_norm': 0.8466721831532721, 'learning_rate': 2.834002280284417e-06, 'epoch': 0.65} + 65%|██████▌ | 7964/12188 [17:03:56<8:25:59, 7.19s/it] 65%|██████▌ | 7965/12188 [17:04:03<8:26:31, 7.20s/it] {'loss': 0.3053, 'grad_norm': 0.7094650687726095, 'learning_rate': 2.8328047965945725e-06, 'epoch': 0.65} + 65%|██████▌ | 7965/12188 [17:04:03<8:26:31, 7.20s/it] 65%|██████▌ | 7966/12188 [17:04:10<8:16:07, 7.05s/it] {'loss': 0.2822, 'grad_norm': 0.6711204301341248, 'learning_rate': 2.8316074659485693e-06, 'epoch': 0.65} + 65%|██████▌ | 7966/12188 [17:04:10<8:16:07, 7.05s/it] 65%|██████▌ | 7967/12188 [17:04:17<8:21:26, 7.13s/it] {'loss': 0.3133, 'grad_norm': 0.682633691559265, 'learning_rate': 2.8304102884309637e-06, 'epoch': 0.65} + 65%|██████▌ | 7967/12188 [17:04:17<8:21:26, 7.13s/it] 65%|██████▌ | 7968/12188 [17:04:24<8:19:35, 7.10s/it] {'loss': 0.313, 'grad_norm': 0.6958498081477276, 'learning_rate': 2.829213264126299e-06, 'epoch': 0.65} + 65%|██████▌ | 7968/12188 [17:04:24<8:19:35, 7.10s/it] 65%|██████▌ | 7969/12188 [17:04:32<8:34:30, 7.32s/it] {'loss': 0.3232, 'grad_norm': 0.6507135030912758, 'learning_rate': 2.828016393119105e-06, 'epoch': 0.65} + 65%|██████▌ | 7969/12188 [17:04:32<8:34:30, 7.32s/it] 65%|██████▌ | 7970/12188 [17:04:39<8:30:12, 7.26s/it] {'loss': 0.2933, 'grad_norm': 0.6409495066158053, 'learning_rate': 2.8268196754939054e-06, 'epoch': 0.65} + 65%|██████▌ | 7970/12188 [17:04:39<8:30:12, 7.26s/it] 65%|██████▌ | 7971/12188 [17:04:46<8:31:57, 7.28s/it] {'loss': 0.27, 'grad_norm': 0.622404800664217, 'learning_rate': 2.8256231113352067e-06, 'epoch': 0.65} + 65%|██████▌ | 7971/12188 [17:04:46<8:31:57, 7.28s/it] 65%|██████▌ | 7972/12188 [17:04:54<8:29:56, 7.26s/it] {'loss': 0.3337, 'grad_norm': 0.8880670867118448, 'learning_rate': 2.8244267007275106e-06, 'epoch': 0.65} + 65%|██████▌ | 7972/12188 [17:04:54<8:29:56, 7.26s/it] 65%|██████▌ | 7973/12188 [17:05:00<8:20:33, 7.13s/it] {'loss': 0.3173, 'grad_norm': 0.7027143735024193, 'learning_rate': 2.8232304437553072e-06, 'epoch': 0.65} + 65%|██████▌ | 7973/12188 [17:05:00<8:20:33, 7.13s/it] 65%|██████▌ | 7974/12188 [17:05:08<8:27:18, 7.22s/it] {'loss': 0.3077, 'grad_norm': 0.5823489809875999, 'learning_rate': 2.822034340503073e-06, 'epoch': 0.65} + 65%|██████▌ | 7974/12188 [17:05:08<8:27:18, 7.22s/it] 65%|██████▌ | 7975/12188 [17:05:15<8:17:07, 7.08s/it] {'loss': 0.3387, 'grad_norm': 0.6921982287229088, 'learning_rate': 2.820838391055273e-06, 'epoch': 0.65} + 65%|██████▌ | 7975/12188 [17:05:15<8:17:07, 7.08s/it] 65%|██████▌ | 7976/12188 [17:05:22<8:17:59, 7.09s/it] {'loss': 0.3023, 'grad_norm': 0.7136520763691966, 'learning_rate': 2.8196425954963657e-06, 'epoch': 0.65} + 65%|██████▌ | 7976/12188 [17:05:22<8:17:59, 7.09s/it] 65%|██████▌ | 7977/12188 [17:05:29<8:29:58, 7.27s/it] {'loss': 0.2964, 'grad_norm': 0.6629077762227502, 'learning_rate': 2.8184469539107933e-06, 'epoch': 0.65} + 65%|██████▌ | 7977/12188 [17:05:29<8:29:58, 7.27s/it] 65%|██████▌ | 7978/12188 [17:05:36<8:27:19, 7.23s/it] {'loss': 0.3259, 'grad_norm': 0.6983121782900509, 'learning_rate': 2.817251466382995e-06, 'epoch': 0.65} + 65%|██████▌ | 7978/12188 [17:05:36<8:27:19, 7.23s/it] 65%|██████▌ | 7979/12188 [17:05:44<8:33:37, 7.32s/it] {'loss': 0.3194, 'grad_norm': 0.6128416116034732, 'learning_rate': 2.8160561329973886e-06, 'epoch': 0.65} + 65%|██████▌ | 7979/12188 [17:05:44<8:33:37, 7.32s/it] 65%|██████▌ | 7980/12188 [17:05:52<8:46:37, 7.51s/it] {'loss': 0.3319, 'grad_norm': 0.7288143720187644, 'learning_rate': 2.814860953838389e-06, 'epoch': 0.65} + 65%|██████▌ | 7980/12188 [17:05:52<8:46:37, 7.51s/it] 65%|██████▌ | 7981/12188 [17:06:00<8:49:29, 7.55s/it] {'loss': 0.3289, 'grad_norm': 0.6391696619735456, 'learning_rate': 2.8136659289904012e-06, 'epoch': 0.65} + 65%|██████▌ | 7981/12188 [17:06:00<8:49:29, 7.55s/it] 65%|██████▌ | 7982/12188 [17:06:06<8:32:40, 7.31s/it] {'loss': 0.3177, 'grad_norm': 0.7139327133318862, 'learning_rate': 2.81247105853781e-06, 'epoch': 0.65} + 65%|██████▌ | 7982/12188 [17:06:06<8:32:40, 7.31s/it] 65%|██████▌ | 7983/12188 [17:06:13<8:24:36, 7.20s/it] {'loss': 0.3302, 'grad_norm': 0.6587489679516033, 'learning_rate': 2.8112763425650002e-06, 'epoch': 0.65} + 65%|██████▌ | 7983/12188 [17:06:13<8:24:36, 7.20s/it] 66%|██████▌ | 7984/12188 [17:06:20<8:13:34, 7.04s/it] {'loss': 0.3285, 'grad_norm': 0.7120362546668921, 'learning_rate': 2.8100817811563376e-06, 'epoch': 0.66} + 66%|██████▌ | 7984/12188 [17:06:20<8:13:34, 7.04s/it] 66%|██████▌ | 7985/12188 [17:06:28<8:27:54, 7.25s/it] {'loss': 0.2508, 'grad_norm': 0.6389258287537181, 'learning_rate': 2.80888737439618e-06, 'epoch': 0.66} + 66%|██████▌ | 7985/12188 [17:06:28<8:27:54, 7.25s/it] 66%|██████▌ | 7986/12188 [17:06:34<8:16:06, 7.08s/it] {'loss': 0.3056, 'grad_norm': 0.6811103802674385, 'learning_rate': 2.8076931223688787e-06, 'epoch': 0.66} + 66%|██████▌ | 7986/12188 [17:06:34<8:16:06, 7.08s/it] 66%|██████▌ | 7987/12188 [17:06:41<8:14:12, 7.06s/it] {'loss': 0.3078, 'grad_norm': 0.6912444692850849, 'learning_rate': 2.8064990251587644e-06, 'epoch': 0.66} + 66%|██████▌ | 7987/12188 [17:06:41<8:14:12, 7.06s/it] 66%|██████▌ | 7988/12188 [17:06:48<8:06:59, 6.96s/it] {'loss': 0.2717, 'grad_norm': 0.6641887956839314, 'learning_rate': 2.8053050828501666e-06, 'epoch': 0.66} + 66%|██████▌ | 7988/12188 [17:06:48<8:06:59, 6.96s/it] 66%|██████▌ | 7989/12188 [17:06:56<8:22:19, 7.18s/it] {'loss': 0.3161, 'grad_norm': 0.7065666428448144, 'learning_rate': 2.8041112955273974e-06, 'epoch': 0.66} + 66%|██████▌ | 7989/12188 [17:06:56<8:22:19, 7.18s/it] 66%|██████▌ | 7990/12188 [17:07:03<8:29:23, 7.28s/it] {'loss': 0.2766, 'grad_norm': 0.5942516017808813, 'learning_rate': 2.8029176632747594e-06, 'epoch': 0.66} + 66%|██████▌ | 7990/12188 [17:07:03<8:29:23, 7.28s/it] 66%|██████▌ | 7991/12188 [17:07:10<8:23:29, 7.20s/it] {'loss': 0.2786, 'grad_norm': 0.6699655914191199, 'learning_rate': 2.801724186176548e-06, 'epoch': 0.66} + 66%|██████▌ | 7991/12188 [17:07:10<8:23:29, 7.20s/it] 66%|██████▌ | 7992/12188 [17:07:17<8:21:12, 7.17s/it] {'loss': 0.322, 'grad_norm': 0.7360553589527367, 'learning_rate': 2.800530864317041e-06, 'epoch': 0.66} + 66%|██████▌ | 7992/12188 [17:07:17<8:21:12, 7.17s/it] 66%|██████▌ | 7993/12188 [17:07:24<8:14:34, 7.07s/it] {'loss': 0.2811, 'grad_norm': 0.7357586256751101, 'learning_rate': 2.7993376977805102e-06, 'epoch': 0.66} + 66%|██████▌ | 7993/12188 [17:07:24<8:14:34, 7.07s/it] 66%|██████▌ | 7994/12188 [17:07:33<8:41:33, 7.46s/it] {'loss': 0.37, 'grad_norm': 0.8908884198167593, 'learning_rate': 2.7981446866512176e-06, 'epoch': 0.66} + 66%|██████▌ | 7994/12188 [17:07:33<8:41:33, 7.46s/it] 66%|██████▌ | 7995/12188 [17:07:40<8:33:57, 7.35s/it] {'loss': 0.274, 'grad_norm': 0.7362893708896101, 'learning_rate': 2.796951831013408e-06, 'epoch': 0.66} + 66%|██████▌ | 7995/12188 [17:07:40<8:33:57, 7.35s/it] 66%|██████▌ | 7996/12188 [17:07:47<8:22:39, 7.19s/it] {'loss': 0.3401, 'grad_norm': 0.6876768496629768, 'learning_rate': 2.795759130951322e-06, 'epoch': 0.66} + 66%|██████▌ | 7996/12188 [17:07:47<8:22:39, 7.19s/it] 66%|██████▌ | 7997/12188 [17:07:54<8:22:11, 7.19s/it] {'loss': 0.3029, 'grad_norm': 0.7091011055176648, 'learning_rate': 2.794566586549183e-06, 'epoch': 0.66} + 66%|██████▌ | 7997/12188 [17:07:54<8:22:11, 7.19s/it] 66%|██████▌ | 7998/12188 [17:08:01<8:15:18, 7.09s/it] {'loss': 0.3239, 'grad_norm': 0.6829291494477921, 'learning_rate': 2.793374197891209e-06, 'epoch': 0.66} + 66%|██████▌ | 7998/12188 [17:08:01<8:15:18, 7.09s/it] 66%|██████▌ | 7999/12188 [17:08:08<8:17:13, 7.12s/it] {'loss': 0.297, 'grad_norm': 0.6451047204750673, 'learning_rate': 2.7921819650616054e-06, 'epoch': 0.66} + 66%|█████��▌ | 7999/12188 [17:08:08<8:17:13, 7.12s/it] 66%|██████▌ | 8000/12188 [17:08:15<8:11:35, 7.04s/it] {'loss': 0.3156, 'grad_norm': 0.6540013213232526, 'learning_rate': 2.7909898881445625e-06, 'epoch': 0.66} + 66%|██████▌ | 8000/12188 [17:08:15<8:11:35, 7.04s/it]/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/utils/checkpoint.py:87: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( + 66%|██████▌ | 8001/12188 [17:08:38<14:01:43, 12.06s/it] {'loss': 0.3168, 'grad_norm': 0.6751906687975074, 'learning_rate': 2.7897979672242663e-06, 'epoch': 0.66} + 66%|██████▌ | 8001/12188 [17:08:38<14:01:43, 12.06s/it] 66%|██████▌ | 8002/12188 [17:08:45<12:13:55, 10.52s/it] {'loss': 0.3037, 'grad_norm': 0.7238674827712859, 'learning_rate': 2.788606202384887e-06, 'epoch': 0.66} + 66%|██████▌ | 8002/12188 [17:08:45<12:13:55, 10.52s/it] 66%|██████▌ | 8003/12188 [17:08:53<11:08:45, 9.59s/it] {'loss': 0.3233, 'grad_norm': 0.7093944862321852, 'learning_rate': 2.787414593710583e-06, 'epoch': 0.66} + 66%|██████▌ | 8003/12188 [17:08:53<11:08:45, 9.59s/it] 66%|██████▌ | 8004/12188 [17:09:00<10:17:49, 8.86s/it] {'loss': 0.3176, 'grad_norm': 0.6740494199761177, 'learning_rate': 2.786223141285507e-06, 'epoch': 0.66} + 66%|██████▌ | 8004/12188 [17:09:00<10:17:49, 8.86s/it] 66%|██████▌ | 8005/12188 [17:09:07<9:32:37, 8.21s/it] {'loss': 0.3659, 'grad_norm': 0.6353185441552085, 'learning_rate': 2.785031845193794e-06, 'epoch': 0.66} + 66%|██████▌ | 8005/12188 [17:09:07<9:32:37, 8.21s/it] 66%|██████▌ | 8006/12188 [17:09:13<9:00:35, 7.76s/it] {'loss': 0.3003, 'grad_norm': 0.7453400095871374, 'learning_rate': 2.7838407055195743e-06, 'epoch': 0.66} + 66%|██████▌ | 8006/12188 [17:09:13<9:00:35, 7.76s/it] 66%|██████▌ | 8007/12188 [17:09:20<8:38:02, 7.43s/it] {'loss': 0.2979, 'grad_norm': 0.7113638454707971, 'learning_rate': 2.782649722346965e-06, 'epoch': 0.66} + 66%|██████▌ | 8007/12188 [17:09:20<8:38:02, 7.43s/it] 66%|██████▌ | 8008/12188 [17:09:28<8:42:36, 7.50s/it] {'loss': 0.3404, 'grad_norm': 0.6926472896882117, 'learning_rate': 2.781458895760068e-06, 'epoch': 0.66} + 66%|██████▌ | 8008/12188 [17:09:28<8:42:36, 7.50s/it] 66%|██████▌ | 8009/12188 [17:09:34<8:25:48, 7.26s/it] {'loss': 0.3247, 'grad_norm': 0.6486150653778961, 'learning_rate': 2.7802682258429813e-06, 'epoch': 0.66} + 66%|██████▌ | 8009/12188 [17:09:34<8:25:48, 7.26s/it] 66%|██████▌ | 8010/12188 [17:09:42<8:26:52, 7.28s/it] {'loss': 0.2866, 'grad_norm': 0.6364815364482068, 'learning_rate': 2.779077712679784e-06, 'epoch': 0.66} + 66%|██████▌ | 8010/12188 [17:09:42<8:26:52, 7.28s/it] 66%|██████▌ | 8011/12188 [17:09:49<8:21:41, 7.21s/it] {'loss': 0.3416, 'grad_norm': 0.6713799371571609, 'learning_rate': 2.7778873563545506e-06, 'epoch': 0.66} + 66%|██████▌ | 8011/12188 [17:09:49<8:21:41, 7.21s/it] 66%|██████▌ | 8012/12188 [17:09:57<8:38:47, 7.45s/it] {'loss': 0.3248, 'grad_norm': 0.6786738301281661, 'learning_rate': 2.7766971569513445e-06, 'epoch': 0.66} + 66%|██████▌ | 8012/12188 [17:09:57<8:38:47, 7.45s/it] 66%|██████▌ | 8013/12188 [17:10:04<8:25:56, 7.27s/it] {'loss': 0.308, 'grad_norm': 0.6532163379205073, 'learning_rate': 2.77550711455421e-06, 'epoch': 0.66} + 66%|██████▌ | 8013/12188 [17:10:04<8:25:56, 7.27s/it] 66%|██████▌ | 8014/12188 [17:10:10<8:09:03, 7.03s/it] {'loss': 0.3195, 'grad_norm': 0.6327741146465581, 'learning_rate': 2.774317229247192e-06, 'epoch': 0.66} + 66%|██████▌ | 8014/12188 [17:10:10<8:09:03, 7.03s/it] 66%|██████▌ | 8015/12188 [17:10:18<8:21:03, 7.20s/it] {'loss': 0.3567, 'grad_norm': 0.6610881328573429, 'learning_rate': 2.7731275011143133e-06, 'epoch': 0.66} + 66%|██████▌ | 8015/12188 [17:10:18<8:21:03, 7.20s/it] 66%|██████▌ | 8016/12188 [17:10:25<8:15:27, 7.13s/it] {'loss': 0.3266, 'grad_norm': 0.6643218407675162, 'learning_rate': 2.7719379302395954e-06, 'epoch': 0.66} + 66%|██████▌ | 8016/12188 [17:10:25<8:15:27, 7.13s/it] 66%|██████▌ | 8017/12188 [17:10:33<8:32:54, 7.38s/it] {'loss': 0.3086, 'grad_norm': 0.7222147017967254, 'learning_rate': 2.7707485167070402e-06, 'epoch': 0.66} + 66%|██████▌ | 8017/12188 [17:10:33<8:32:54, 7.38s/it] 66%|██████▌ | 8018/12188 [17:10:41<8:53:02, 7.67s/it] {'loss': 0.2753, 'grad_norm': 0.6528689719801548, 'learning_rate': 2.769559260600642e-06, 'epoch': 0.66} + 66%|██████▌ | 8018/12188 [17:10:41<8:53:02, 7.67s/it] 66%|██████▌ | 8019/12188 [17:10:48<8:41:57, 7.51s/it] {'loss': 0.2904, 'grad_norm': 0.7107512390642204, 'learning_rate': 2.768370162004387e-06, 'epoch': 0.66} + 66%|██████▌ | 8019/12188 [17:10:48<8:41:57, 7.51s/it] 66%|██████▌ | 8020/12188 [17:10:55<8:31:24, 7.36s/it] {'loss': 0.2886, 'grad_norm': 0.6799752773150332, 'learning_rate': 2.7671812210022433e-06, 'epoch': 0.66} + 66%|██████▌ | 8020/12188 [17:10:55<8:31:24, 7.36s/it] 66%|██████▌ | 8021/12188 [17:11:02<8:26:32, 7.29s/it] {'loss': 0.3416, 'grad_norm': 0.7773458414795124, 'learning_rate': 2.7659924376781742e-06, 'epoch': 0.66} + 66%|██████▌ | 8021/12188 [17:11:02<8:26:32, 7.29s/it] 66%|██████▌ | 8022/12188 [17:11:09<8:20:54, 7.21s/it] {'loss': 0.309, 'grad_norm': 0.6843294505511738, 'learning_rate': 2.764803812116133e-06, 'epoch': 0.66} + 66%|██████▌ | 8022/12188 [17:11:09<8:20:54, 7.21s/it] 66%|██████▌ | 8023/12188 [17:11:16<8:13:22, 7.11s/it] {'loss': 0.2935, 'grad_norm': 0.7143500021761804, 'learning_rate': 2.763615344400053e-06, 'epoch': 0.66} + 66%|██████▌ | 8023/12188 [17:11:16<8:13:22, 7.11s/it] 66%|██████▌ | 8024/12188 [17:11:23<8:09:49, 7.06s/it] {'loss': 0.3247, 'grad_norm': 0.7239624082823232, 'learning_rate': 2.7624270346138643e-06, 'epoch': 0.66} + 66%|██████▌ | 8024/12188 [17:11:23<8:09:49, 7.06s/it] 66%|██████▌ | 8025/12188 [17:11:30<8:09:31, 7.06s/it] {'loss': 0.2665, 'grad_norm': 0.6059016395654344, 'learning_rate': 2.761238882841486e-06, 'epoch': 0.66} + 66%|██████▌ | 8025/12188 [17:11:30<8:09:31, 7.06s/it] 66%|██████▌ | 8026/12188 [17:11:37<7:57:45, 6.89s/it] {'loss': 0.3197, 'grad_norm': 0.7159362278412904, 'learning_rate': 2.760050889166818e-06, 'epoch': 0.66} + 66%|██████▌ | 8026/12188 [17:11:37<7:57:45, 6.89s/it] 66%|██████▌ | 8027/12188 [17:11:43<7:53:49, 6.83s/it] {'loss': 0.3167, 'grad_norm': 0.6687780116060105, 'learning_rate': 2.7588630536737606e-06, 'epoch': 0.66} + 66%|██████▌ | 8027/12188 [17:11:43<7:53:49, 6.83s/it] 66%|██████▌ | 8028/12188 [17:11:51<8:08:39, 7.05s/it] {'loss': 0.3274, 'grad_norm': 0.6872441246826941, 'learning_rate': 2.7576753764461906e-06, 'epoch': 0.66} + 66%|██████▌ | 8028/12188 [17:11:51<8:08:39, 7.05s/it] 66%|██████▌ | 8029/12188 [17:11:58<8:15:58, 7.16s/it] {'loss': 0.2803, 'grad_norm': 0.678388006870216, 'learning_rate': 2.7564878575679852e-06, 'epoch': 0.66} + 66%|██████▌ | 8029/12188 [17:11:58<8:15:58, 7.16s/it] 66%|██████▌ | 8030/12188 [17:12:05<8:05:33, 7.01s/it] {'loss': 0.3009, 'grad_norm': 0.6670497135557879, 'learning_rate': 2.7553004971230037e-06, 'epoch': 0.66} + 66%|██████▌ | 8030/12188 [17:12:05<8:05:33, 7.01s/it] 66%|██████▌ | 8031/12188 [17:12:12<8:04:28, 6.99s/it] {'loss': 0.3108, 'grad_norm': 0.6603155764043044, 'learning_rate': 2.7541132951950917e-06, 'epoch': 0.66} + 66%|██████▌ | 8031/12188 [17:12:12<8:04:28, 6.99s/it] 66%|██████▌ | 8032/12188 [17:12:19<8:05:38, 7.01s/it] {'loss': 0.3361, 'grad_norm': 0.7870375908753914, 'learning_rate': 2.7529262518680927e-06, 'epoch': 0.66} + 66%|██████▌ | 8032/12188 [17:12:19<8:05:38, 7.01s/it] 66%|██████▌ | 8033/12188 [17:12:26<8:07:58, 7.05s/it] {'loss': 0.2932, 'grad_norm': 0.6620973308066272, 'learning_rate': 2.7517393672258296e-06, 'epoch': 0.66} + 66%|██████▌ | 8033/12188 [17:12:26<8:07:58, 7.05s/it] 66%|██████▌ | 8034/12188 [17:12:33<8:02:28, 6.97s/it] {'loss': 0.3436, 'grad_norm': 0.7327253398425994, 'learning_rate': 2.7505526413521205e-06, 'epoch': 0.66} + 66%|██████▌ | 8034/12188 [17:12:33<8:02:28, 6.97s/it] 66%|██████▌ | 8035/12188 [17:12:40<8:00:48, 6.95s/it] {'loss': 0.328, 'grad_norm': 0.7331772807342967, 'learning_rate': 2.7493660743307717e-06, 'epoch': 0.66} + 66%|██████▌ | 8035/12188 [17:12:40<8:00:48, 6.95s/it] 66%|██████▌ | 8036/12188 [17:12:47<7:56:38, 6.89s/it] {'loss': 0.2983, 'grad_norm': 0.6653254454261436, 'learning_rate': 2.748179666245573e-06, 'epoch': 0.66} + 66%|██████▌ | 8036/12188 [17:12:47<7:56:38, 6.89s/it] 66%|██████▌ | 8037/12188 [17:12:53<7:53:56, 6.85s/it] {'loss': 0.3031, 'grad_norm': 0.7663685244853932, 'learning_rate': 2.7469934171803103e-06, 'epoch': 0.66} + 66%|██████▌ | 8037/12188 [17:12:53<7:53:56, 6.85s/it] 66%|██████▌ | 8038/12188 [17:13:01<8:17:47, 7.20s/it] {'loss': 0.3748, 'grad_norm': 0.6694959074221706, 'learning_rate': 2.745807327218751e-06, 'epoch': 0.66} + 66%|██████▌ | 8038/12188 [17:13:01<8:17:47, 7.20s/it] 66%|██████▌ | 8039/12188 [17:13:09<8:19:33, 7.22s/it] {'loss': 0.2847, 'grad_norm': 0.6426470712764738, 'learning_rate': 2.744621396444656e-06, 'epoch': 0.66} + 66%|██████▌ | 8039/12188 [17:13:09<8:19:33, 7.22s/it] 66%|██████▌ | 8040/12188 [17:13:16<8:34:14, 7.44s/it] {'loss': 0.2902, 'grad_norm': 0.6915273324327135, 'learning_rate': 2.743435624941777e-06, 'epoch': 0.66} + 66%|██████▌ | 8040/12188 [17:13:17<8:34:14, 7.44s/it] 66%|██████▌ | 8041/12188 [17:13:24<8:36:22, 7.47s/it] {'loss': 0.3233, 'grad_norm': 0.677242015945906, 'learning_rate': 2.742250012793847e-06, 'epoch': 0.66} + 66%|██████▌ | 8041/12188 [17:13:24<8:36:22, 7.47s/it] 66%|██████▌ | 8042/12188 [17:13:31<8:24:09, 7.30s/it] {'loss': 0.2813, 'grad_norm': 0.6561433765768877, 'learning_rate': 2.741064560084595e-06, 'epoch': 0.66} + 66%|██████▌ | 8042/12188 [17:13:31<8:24:09, 7.30s/it] 66%|██████▌ | 8043/12188 [17:13:38<8:14:08, 7.15s/it] {'loss': 0.3167, 'grad_norm': 0.6646038766648574, 'learning_rate': 2.7398792668977327e-06, 'epoch': 0.66} + 66%|██████▌ | 8043/12188 [17:13:38<8:14:08, 7.15s/it] 66%|██████▌ | 8044/12188 [17:13:51<10:25:40, 9.06s/it] {'loss': 0.3367, 'grad_norm': 0.6384142758739458, 'learning_rate': 2.7386941333169677e-06, 'epoch': 0.66} + 66%|██████▌ | 8044/12188 [17:13:51<10:25:40, 9.06s/it] 66%|██████▌ | 8045/12188 [17:13:58<9:37:57, 8.37s/it] {'loss': 0.2859, 'grad_norm': 0.641680118670966, 'learning_rate': 2.7375091594259893e-06, 'epoch': 0.66} + 66%|██████▌ | 8045/12188 [17:13:58<9:37:57, 8.37s/it] 66%|██████▌ | 8046/12188 [17:14:05<9:04:29, 7.89s/it] {'loss': 0.3098, 'grad_norm': 0.6510985085255568, 'learning_rate': 2.7363243453084776e-06, 'epoch': 0.66} + 66%|██████▌ | 8046/12188 [17:14:05<9:04:29, 7.89s/it] 66%|██████▌ | 8047/12188 [17:14:13<9:09:13, 7.96s/it] {'loss': 0.3223, 'grad_norm': 0.7038564196400859, 'learning_rate': 2.735139691048104e-06, 'epoch': 0.66} + 66%|██████▌ | 8047/12188 [17:14:13<9:09:13, 7.96s/it] 66%|██████▌ | 8048/12188 [17:14:20<8:51:14, 7.70s/it] {'loss': 0.3299, 'grad_norm': 0.666672380930649, 'learning_rate': 2.7339551967285285e-06, 'epoch': 0.66} + 66%|██████▌ | 8048/12188 [17:14:20<8:51:14, 7.70s/it] 66%|██████▌ | 8049/12188 [17:14:27<8:30:26, 7.40s/it] {'loss': 0.2857, 'grad_norm': 0.6422938782666655, 'learning_rate': 2.7327708624333936e-06, 'epoch': 0.66} + 66%|██████▌ | 8049/12188 [17:14:27<8:30:26, 7.40s/it] 66%|██████▌ | 8050/12188 [17:14:34<8:24:14, 7.31s/it] {'loss': 0.3167, 'grad_norm': 0.6591156552037318, 'learning_rate': 2.7315866882463403e-06, 'epoch': 0.66} + 66%|██████▌ | 8050/12188 [17:14:34<8:24:14, 7.31s/it] 66%|██████▌ | 8051/12188 [17:14:44<9:20:45, 8.13s/it] {'loss': 0.3038, 'grad_norm': 0.6285949551324369, 'learning_rate': 2.7304026742509893e-06, 'epoch': 0.66} + 66%|██████▌ | 8051/12188 [17:14:44<9:20:45, 8.13s/it] 66%|██████▌ | 8052/12188 [17:14:51<8:57:21, 7.80s/it] {'loss': 0.2915, 'grad_norm': 0.691286203714858, 'learning_rate': 2.7292188205309543e-06, 'epoch': 0.66} + 66%|██████▌ | 8052/12188 [17:14:51<8:57:21, 7.80s/it] 66%|██████▌ | 8053/12188 [17:14:58<8:34:44, 7.47s/it] {'loss': 0.3058, 'grad_norm': 0.7118300678313455, 'learning_rate': 2.7280351271698403e-06, 'epoch': 0.66} + 66%|██████▌ | 8053/12188 [17:14:58<8:34:44, 7.47s/it] 66%|██████▌ | 8054/12188 [17:15:05<8:34:59, 7.47s/it] {'loss': 0.2844, 'grad_norm': 0.6980213872462306, 'learning_rate': 2.726851594251233e-06, 'epoch': 0.66} + 66%|██████▌ | 8054/12188 [17:15:05<8:34:59, 7.47s/it] 66%|██████▌ | 8055/12188 [17:15:12<8:22:43, 7.30s/it] {'loss': 0.325, 'grad_norm': 0.688073249989975, 'learning_rate': 2.725668221858716e-06, 'epoch': 0.66} + 66%|██████▌ | 8055/12188 [17:15:12<8:22:43, 7.30s/it] 66%|██████▌ | 8056/12188 [17:15:19<8:12:23, 7.15s/it] {'loss': 0.3347, 'grad_norm': 0.6672051519276098, 'learning_rate': 2.7244850100758535e-06, 'epoch': 0.66} + 66%|██████▌ | 8056/12188 [17:15:19<8:12:23, 7.15s/it] 66%|██████▌ | 8057/12188 [17:15:26<8:11:00, 7.13s/it] {'loss': 0.2969, 'grad_norm': 0.6684148634700576, 'learning_rate': 2.7233019589862055e-06, 'epoch': 0.66} + 66%|██████▌ | 8057/12188 [17:15:26<8:11:00, 7.13s/it] 66%|██████▌ | 8058/12188 [17:15:34<8:33:47, 7.46s/it] {'loss': 0.2878, 'grad_norm': 0.6890590419296401, 'learning_rate': 2.7221190686733157e-06, 'epoch': 0.66} + 66%|██████▌ | 8058/12188 [17:15:34<8:33:47, 7.46s/it] 66%|██████▌ | 8059/12188 [17:15:41<8:28:34, 7.39s/it] {'loss': 0.328, 'grad_norm': 0.8440488719906014, 'learning_rate': 2.720936339220715e-06, 'epoch': 0.66} + 66%|██████▌ | 8059/12188 [17:15:41<8:28:34, 7.39s/it] 66%|██████▌ | 8060/12188 [17:15:49<8:29:31, 7.41s/it] {'loss': 0.3511, 'grad_norm': 0.6718076373570552, 'learning_rate': 2.7197537707119287e-06, 'epoch': 0.66} + 66%|██████▌ | 8060/12188 [17:15:49<8:29:31, 7.41s/it] 66%|██████▌ | 8061/12188 [17:15:56<8:23:12, 7.32s/it] {'loss': 0.2959, 'grad_norm': 0.6307489280416285, 'learning_rate': 2.71857136323047e-06, 'epoch': 0.66} + 66%|██████▌ | 8061/12188 [17:15:56<8:23:12, 7.32s/it] 66%|██████▌ | 8062/12188 [17:16:03<8:10:44, 7.14s/it] {'loss': 0.2798, 'grad_norm': 0.6632069809781064, 'learning_rate': 2.7173891168598332e-06, 'epoch': 0.66} + 66%|██████▌ | 8062/12188 [17:16:03<8:10:44, 7.14s/it] 66%|██████▌ | 8063/12188 [17:16:11<8:31:23, 7.44s/it] {'loss': 0.3409, 'grad_norm': 0.7118375188753067, 'learning_rate': 2.7162070316835125e-06, 'epoch': 0.66} + 66%|██████▌ | 8063/12188 [17:16:11<8:31:23, 7.44s/it] 66%|██████▌ | 8064/12188 [17:16:17<8:17:32, 7.24s/it] {'loss': 0.3121, 'grad_norm': 0.738878922979594, 'learning_rate': 2.7150251077849797e-06, 'epoch': 0.66} + 66%|██████▌ | 8064/12188 [17:16:17<8:17:32, 7.24s/it] 66%|██████▌ | 8065/12188 [17:16:24<8:01:20, 7.00s/it] {'loss': 0.362, 'grad_norm': 0.7342211310168653, 'learning_rate': 2.713843345247703e-06, 'epoch': 0.66} + 66%|██████▌ | 8065/12188 [17:16:24<8:01:20, 7.00s/it] 66%|██████▌ | 8066/12188 [17:16:32<8:31:05, 7.44s/it] {'loss': 0.2911, 'grad_norm': 0.7126169143099944, 'learning_rate': 2.7126617441551383e-06, 'epoch': 0.66} + 66%|██████▌ | 8066/12188 [17:16:32<8:31:05, 7.44s/it] 66%|██████▌ | 8067/12188 [17:16:40<8:29:59, 7.43s/it] {'loss': 0.3224, 'grad_norm': 0.7788238433970327, 'learning_rate': 2.7114803045907245e-06, 'epoch': 0.66} + 66%|██████▌ | 8067/12188 [17:16:40<8:29:59, 7.43s/it] 66%|██████▌ | 8068/12188 [17:16:47<8:30:16, 7.43s/it] {'loss': 0.3011, 'grad_norm': 0.69345805511402, 'learning_rate': 2.710299026637896e-06, 'epoch': 0.66} + 66%|██████▌ | 8068/12188 [17:16:47<8:30:16, 7.43s/it] 66%|██████▌ | 8069/12188 [17:16:55<8:30:07, 7.43s/it] {'loss': 0.3064, 'grad_norm': 0.993037419205405, 'learning_rate': 2.70911791038007e-06, 'epoch': 0.66} + 66%|██████▌ | 8069/12188 [17:16:55<8:30:07, 7.43s/it] 66%|██████▌ | 8070/12188 [17:17:01<8:17:30, 7.25s/it] {'loss': 0.3159, 'grad_norm': 0.727668006912345, 'learning_rate': 2.707936955900659e-06, 'epoch': 0.66} + 66%|██████▌ | 8070/12188 [17:17:01<8:17:30, 7.25s/it] 66%|██████▌ | 8071/12188 [17:17:08<8:08:00, 7.11s/it] {'loss': 0.291, 'grad_norm': 0.6852722117056528, 'learning_rate': 2.706756163283055e-06, 'epoch': 0.66} + 66%|██████▌ | 8071/12188 [17:17:08<8:08:00, 7.11s/it] 66%|██████▌ | 8072/12188 [17:17:15<8:01:28, 7.02s/it] {'loss': 0.278, 'grad_norm': 0.7162188748850601, 'learning_rate': 2.705575532610649e-06, 'epoch': 0.66} + 66%|██████▌ | 8072/12188 [17:17:15<8:01:28, 7.02s/it] 66%|██████▌ | 8073/12188 [17:17:22<7:56:54, 6.95s/it] {'loss': 0.3506, 'grad_norm': 0.6809288264383753, 'learning_rate': 2.7043950639668126e-06, 'epoch': 0.66} + 66%|██████▌ | 8073/12188 [17:17:22<7:56:54, 6.95s/it] 66%|██████▌ | 8074/12188 [17:17:29<7:55:02, 6.93s/it] {'loss': 0.3179, 'grad_norm': 0.7429844150807912, 'learning_rate': 2.703214757434906e-06, 'epoch': 0.66} + 66%|██████▌ | 8074/12188 [17:17:29<7:55:02, 6.93s/it] 66%|██████▋ | 8075/12188 [17:17:36<7:51:50, 6.88s/it] {'loss': 0.3333, 'grad_norm': 0.6839896720523092, 'learning_rate': 2.702034613098283e-06, 'epoch': 0.66} + 66%|██████▋ | 8075/12188 [17:17:36<7:51:50, 6.88s/it] 66%|██████▋ | 8076/12188 [17:17:43<8:02:22, 7.04s/it] {'loss': 0.2937, 'grad_norm': 0.6741242199754676, 'learning_rate': 2.7008546310402867e-06, 'epoch': 0.66} + 66%|██████▋ | 8076/12188 [17:17:43<8:02:22, 7.04s/it] 66%|██████▋ | 8077/12188 [17:17:50<7:54:27, 6.92s/it] {'loss': 0.2924, 'grad_norm': 0.6544130251368766, 'learning_rate': 2.6996748113442397e-06, 'epoch': 0.66} + 66%|██████▋ | 8077/12188 [17:17:50<7:54:27, 6.92s/it] 66%|██████▋ | 8078/12188 [17:17:57<8:02:41, 7.05s/it] {'loss': 0.2917, 'grad_norm': 0.6803490741008016, 'learning_rate': 2.698495154093462e-06, 'epoch': 0.66} + 66%|██████▋ | 8078/12188 [17:17:57<8:02:41, 7.05s/it] 66%|██████▋ | 8079/12188 [17:18:03<7:52:48, 6.90s/it] {'loss': 0.3186, 'grad_norm': 0.6952327704071696, 'learning_rate': 2.6973156593712615e-06, 'epoch': 0.66} + 66%|██████▋ | 8079/12188 [17:18:03<7:52:48, 6.90s/it] 66%|██████▋ | 8080/12188 [17:18:11<8:07:24, 7.12s/it] {'loss': 0.2919, 'grad_norm': 0.6880336821437825, 'learning_rate': 2.6961363272609265e-06, 'epoch': 0.66} + 66%|██████▋ | 8080/12188 [17:18:11<8:07:24, 7.12s/it] 66%|██████▋ | 8081/12188 [17:18:18<8:11:54, 7.19s/it] {'loss': 0.2745, 'grad_norm': 0.6841302263248249, 'learning_rate': 2.694957157845745e-06, 'epoch': 0.66} + 66%|██████▋ | 8081/12188 [17:18:18<8:11:54, 7.19s/it] 66%|██████▋ | 8082/12188 [17:18:25<8:03:23, 7.06s/it] {'loss': 0.3249, 'grad_norm': 0.690849634849999, 'learning_rate': 2.6937781512089838e-06, 'epoch': 0.66} + 66%|██████▋ | 8082/12188 [17:18:25<8:03:23, 7.06s/it] 66%|██████▋ | 8083/12188 [17:18:33<8:09:54, 7.16s/it] {'loss': 0.2799, 'grad_norm': 0.6929730180761545, 'learning_rate': 2.692599307433904e-06, 'epoch': 0.66} + 66%|██████▋ | 8083/12188 [17:18:33<8:09:54, 7.16s/it] 66%|██████▋ | 8084/12188 [17:18:39<7:56:08, 6.96s/it] {'loss': 0.2873, 'grad_norm': 0.7435123354123031, 'learning_rate': 2.691420626603757e-06, 'epoch': 0.66} + 66%|██████▋ | 8084/12188 [17:18:39<7:56:08, 6.96s/it] 66%|██████▋ | 8085/12188 [17:18:46<7:50:29, 6.88s/it] {'loss': 0.3156, 'grad_norm': 0.6681196669253539, 'learning_rate': 2.690242108801775e-06, 'epoch': 0.66} + 66%|██████▋ | 8085/12188 [17:18:46<7:50:29, 6.88s/it] 66%|██████▋ | 8086/12188 [17:18:54<8:10:02, 7.17s/it] {'loss': 0.2843, 'grad_norm': 0.7421668639915527, 'learning_rate': 2.689063754111185e-06, 'epoch': 0.66} + 66%|██████▋ | 8086/12188 [17:18:54<8:10:02, 7.17s/it] 66%|██████▋ | 8087/12188 [17:19:00<8:02:47, 7.06s/it] {'loss': 0.2767, 'grad_norm': 0.6365789694547688, 'learning_rate': 2.687885562615198e-06, 'epoch': 0.66} + 66%|██████▋ | 8087/12188 [17:19:00<8:02:47, 7.06s/it] 66%|██████▋ | 8088/12188 [17:19:08<8:12:43, 7.21s/it] {'loss': 0.3533, 'grad_norm': 0.6645573967068177, 'learning_rate': 2.686707534397018e-06, 'epoch': 0.66} + 66%|██████▋ | 8088/12188 [17:19:08<8:12:43, 7.21s/it] 66%|██████▋ | 8089/12188 [17:19:15<8:10:50, 7.18s/it] {'loss': 0.3514, 'grad_norm': 0.719401861918022, 'learning_rate': 2.685529669539837e-06, 'epoch': 0.66} + 66%|██████▋ | 8089/12188 [17:19:15<8:10:50, 7.18s/it] 66%|██████▋ | 8090/12188 [17:19:22<7:59:03, 7.01s/it] {'loss': 0.3072, 'grad_norm': 0.6827517104173965, 'learning_rate': 2.6843519681268304e-06, 'epoch': 0.66} + 66%|██████▋ | 8090/12188 [17:19:22<7:59:03, 7.01s/it] 66%|██████▋ | 8091/12188 [17:19:29<7:54:43, 6.95s/it] {'loss': 0.3218, 'grad_norm': 0.65661880335195, 'learning_rate': 2.6831744302411695e-06, 'epoch': 0.66} + 66%|██████▋ | 8091/12188 [17:19:29<7:54:43, 6.95s/it] 66%|██████▋ | 8092/12188 [17:19:36<8:11:45, 7.20s/it] {'loss': 0.2907, 'grad_norm': 0.6856756902771224, 'learning_rate': 2.681997055966006e-06, 'epoch': 0.66} + 66%|██████▋ | 8092/12188 [17:19:36<8:11:45, 7.20s/it] 66%|██████▋ | 8093/12188 [17:19:44<8:15:11, 7.26s/it] {'loss': 0.3129, 'grad_norm': 0.6480347547467532, 'learning_rate': 2.6808198453844857e-06, 'epoch': 0.66} + 66%|██████▋ | 8093/12188 [17:19:44<8:15:11, 7.26s/it] 66%|██████▋ | 8094/12188 [17:19:51<8:05:56, 7.12s/it] {'loss': 0.3535, 'grad_norm': 0.7273533159580978, 'learning_rate': 2.6796427985797447e-06, 'epoch': 0.66} + 66%|██████▋ | 8094/12188 [17:19:51<8:05:56, 7.12s/it] 66%|██████▋ | 8095/12188 [17:19:57<7:54:40, 6.96s/it] {'loss': 0.2896, 'grad_norm': 0.8172638076533322, 'learning_rate': 2.678465915634899e-06, 'epoch': 0.66} + 66%|██████▋ | 8095/12188 [17:19:57<7:54:40, 6.96s/it] 66%|██████▋ | 8096/12188 [17:20:05<8:05:46, 7.12s/it] {'loss': 0.274, 'grad_norm': 0.6362608093654536, 'learning_rate': 2.6772891966330605e-06, 'epoch': 0.66} + 66%|██████▋ | 8096/12188 [17:20:05<8:05:46, 7.12s/it] 66%|██████▋ | 8097/12188 [17:20:13<8:29:35, 7.47s/it] {'loss': 0.3302, 'grad_norm': 0.6555354696046777, 'learning_rate': 2.676112641657329e-06, 'epoch': 0.66} + 66%|██████▋ | 8097/12188 [17:20:13<8:29:35, 7.47s/it] 66%|██████▋ | 8098/12188 [17:20:20<8:15:46, 7.27s/it] {'loss': 0.3054, 'grad_norm': 0.7080644017992007, 'learning_rate': 2.6749362507907895e-06, 'epoch': 0.66} + 66%|██████▋ | 8098/12188 [17:20:20<8:15:46, 7.27s/it] 66%|██████▋ | 8099/12188 [17:20:27<8:06:57, 7.15s/it] {'loss': 0.2839, 'grad_norm': 0.7173359113883363, 'learning_rate': 2.673760024116514e-06, 'epoch': 0.66} + 66%|██████▋ | 8099/12188 [17:20:27<8:06:57, 7.15s/it] 66%|██████▋ | 8100/12188 [17:20:34<8:17:12, 7.30s/it] {'loss': 0.3083, 'grad_norm': 0.7291809372131586, 'learning_rate': 2.672583961717571e-06, 'epoch': 0.66} + 66%|██████▋ | 8100/12188 [17:20:34<8:17:12, 7.30s/it] 66%|██████▋ | 8101/12188 [17:20:41<8:09:36, 7.19s/it] {'loss': 0.3062, 'grad_norm': 0.5962906250061993, 'learning_rate': 2.671408063677007e-06, 'epoch': 0.66} + 66%|██████▋ | 8101/12188 [17:20:41<8:09:36, 7.19s/it] 66%|██████▋ | 8102/12188 [17:20:49<8:19:50, 7.34s/it] {'loss': 0.2805, 'grad_norm': 0.5737824387099492, 'learning_rate': 2.670232330077866e-06, 'epoch': 0.66} + 66%|██████▋ | 8102/12188 [17:20:49<8:19:50, 7.34s/it] 66%|██████▋ | 8103/12188 [17:20:56<8:16:08, 7.29s/it] {'loss': 0.3254, 'grad_norm': 0.7161265067709608, 'learning_rate': 2.669056761003173e-06, 'epoch': 0.66} + 66%|██████▋ | 8103/12188 [17:20:56<8:16:08, 7.29s/it] 66%|██████▋ | 8104/12188 [17:21:03<8:08:25, 7.18s/it] {'loss': 0.3061, 'grad_norm': 0.7163349336592878, 'learning_rate': 2.6678813565359487e-06, 'epoch': 0.66} + 66%|██████▋ | 8104/12188 [17:21:03<8:08:25, 7.18s/it] 66%|██████▋ | 8105/12188 [17:21:10<7:57:02, 7.01s/it] {'loss': 0.3364, 'grad_norm': 0.6891721573356774, 'learning_rate': 2.666706116759194e-06, 'epoch': 0.66} + 66%|██████▋ | 8105/12188 [17:21:10<7:57:02, 7.01s/it] 67%|██████▋ | 8106/12188 [17:21:17<7:59:27, 7.05s/it] {'loss': 0.3168, 'grad_norm': 0.7091457857780976, 'learning_rate': 2.665531041755905e-06, 'epoch': 0.67} + 67%|██████▋ | 8106/12188 [17:21:17<7:59:27, 7.05s/it] 67%|██████▋ | 8107/12188 [17:21:24<8:14:45, 7.27s/it] {'loss': 0.2898, 'grad_norm': 0.7166241820556125, 'learning_rate': 2.664356131609065e-06, 'epoch': 0.67} + 67%|██████▋ | 8107/12188 [17:21:24<8:14:45, 7.27s/it] 67%|██████▋ | 8108/12188 [17:21:32<8:17:23, 7.31s/it] {'loss': 0.285, 'grad_norm': 0.6610305757150101, 'learning_rate': 2.66318138640164e-06, 'epoch': 0.67} + 67%|██████▋ | 8108/12188 [17:21:32<8:17:23, 7.31s/it] 67%|██████▋ | 8109/12188 [17:21:38<8:02:34, 7.10s/it] {'loss': 0.2955, 'grad_norm': 0.7043420100687419, 'learning_rate': 2.662006806216594e-06, 'epoch': 0.67} + 67%|██████▋ | 8109/12188 [17:21:39<8:02:34, 7.10s/it] 67%|██████▋ | 8110/12188 [17:21:46<8:15:37, 7.29s/it] {'loss': 0.3145, 'grad_norm': 0.692499879363533, 'learning_rate': 2.660832391136868e-06, 'epoch': 0.67} + 67%|██████▋ | 8110/12188 [17:21:46<8:15:37, 7.29s/it] 67%|██████▋ | 8111/12188 [17:21:53<8:02:33, 7.10s/it] {'loss': 0.2916, 'grad_norm': 0.6932960613536591, 'learning_rate': 2.6596581412454013e-06, 'epoch': 0.67} + 67%|██████▋ | 8111/12188 [17:21:53<8:02:33, 7.10s/it] 67%|██████▋ | 8112/12188 [17:22:01<8:17:50, 7.33s/it] {'loss': 0.2893, 'grad_norm': 0.6837357807637505, 'learning_rate': 2.658484056625118e-06, 'epoch': 0.67} + 67%|██████▋ | 8112/12188 [17:22:01<8:17:50, 7.33s/it] 67%|██████▋ | 8113/12188 [17:22:07<8:03:30, 7.12s/it] {'loss': 0.3025, 'grad_norm': 0.7119390310635075, 'learning_rate': 2.6573101373589296e-06, 'epoch': 0.67} + 67%|██████▋ | 8113/12188 [17:22:07<8:03:30, 7.12s/it] 67%|██████▋ | 8114/12188 [17:22:14<7:59:20, 7.06s/it] {'loss': 0.3393, 'grad_norm': 0.8727110300646701, 'learning_rate': 2.6561363835297325e-06, 'epoch': 0.67} + 67%|██████▋ | 8114/12188 [17:22:14<7:59:20, 7.06s/it] 67%|██████▋ | 8115/12188 [17:22:22<8:08:19, 7.19s/it] {'loss': 0.3295, 'grad_norm': 0.6731182348400044, 'learning_rate': 2.6549627952204202e-06, 'epoch': 0.67} + 67%|██████▋ | 8115/12188 [17:22:22<8:08:19, 7.19s/it] 67%|██████▋ | 8116/12188 [17:22:29<8:01:17, 7.09s/it] {'loss': 0.298, 'grad_norm': 0.6174508455107487, 'learning_rate': 2.6537893725138664e-06, 'epoch': 0.67} + 67%|██████▋ | 8116/12188 [17:22:29<8:01:17, 7.09s/it] 67%|██████▋ | 8117/12188 [17:22:37<8:30:15, 7.52s/it] {'loss': 0.2744, 'grad_norm': 0.6857399510701339, 'learning_rate': 2.6526161154929396e-06, 'epoch': 0.67} + 67%|██████▋ | 8117/12188 [17:22:37<8:30:15, 7.52s/it] 67%|██████▋ | 8118/12188 [17:22:44<8:23:28, 7.42s/it] {'loss': 0.339, 'grad_norm': 0.6875080184029904, 'learning_rate': 2.651443024240489e-06, 'epoch': 0.67} + 67%|██████▋ | 8118/12188 [17:22:44<8:23:28, 7.42s/it] 67%|██████▋ | 8119/12188 [17:22:52<8:17:38, 7.34s/it] {'loss': 0.2997, 'grad_norm': 0.6508821962647262, 'learning_rate': 2.6502700988393593e-06, 'epoch': 0.67} + 67%|██████▋ | 8119/12188 [17:22:52<8:17:38, 7.34s/it] 67%|██████▋ | 8120/12188 [17:23:01<8:52:03, 7.85s/it] {'loss': 0.3013, 'grad_norm': 0.6884870745907083, 'learning_rate': 2.649097339372382e-06, 'epoch': 0.67} + 67%|██████▋ | 8120/12188 [17:23:01<8:52:03, 7.85s/it] 67%|██████▋ | 8121/12188 [17:23:07<8:33:04, 7.57s/it] {'loss': 0.3206, 'grad_norm': 0.6236580612074822, 'learning_rate': 2.647924745922371e-06, 'epoch': 0.67} + 67%|██████▋ | 8121/12188 [17:23:07<8:33:04, 7.57s/it] 67%|██████▋ | 8122/12188 [17:23:14<8:21:51, 7.41s/it] {'loss': 0.3167, 'grad_norm': 0.7228484502580459, 'learning_rate': 2.6467523185721376e-06, 'epoch': 0.67} + 67%|██████▋ | 8122/12188 [17:23:15<8:21:51, 7.41s/it] 67%|██████▋ | 8123/12188 [17:23:22<8:16:59, 7.34s/it] {'loss': 0.2964, 'grad_norm': 0.639749980162052, 'learning_rate': 2.645580057404473e-06, 'epoch': 0.67} + 67%|██████▋ | 8123/12188 [17:23:22<8:16:59, 7.34s/it] 67%|██████▋ | 8124/12188 [17:23:29<8:13:24, 7.28s/it] {'loss': 0.3123, 'grad_norm': 0.6677747266995528, 'learning_rate': 2.6444079625021625e-06, 'epoch': 0.67} + 67%|██████▋ | 8124/12188 [17:23:29<8:13:24, 7.28s/it] 67%|██████▋ | 8125/12188 [17:23:37<8:22:42, 7.42s/it] {'loss': 0.3718, 'grad_norm': 0.6450231882670014, 'learning_rate': 2.6432360339479786e-06, 'epoch': 0.67} + 67%|██████▋ | 8125/12188 [17:23:37<8:22:42, 7.42s/it] 67%|██████▋ | 8126/12188 [17:23:43<8:11:36, 7.26s/it] {'loss': 0.3112, 'grad_norm': 0.5965623434140845, 'learning_rate': 2.642064271824679e-06, 'epoch': 0.67} + 67%|██████▋ | 8126/12188 [17:23:43<8:11:36, 7.26s/it] 67%|██████▋ | 8127/12188 [17:23:51<8:19:52, 7.39s/it] {'loss': 0.3085, 'grad_norm': 0.7153228657172461, 'learning_rate': 2.640892676215011e-06, 'epoch': 0.67} + 67%|██████▋ | 8127/12188 [17:23:51<8:19:52, 7.39s/it] 67%|██████▋ | 8128/12188 [17:23:58<8:12:31, 7.28s/it] {'loss': 0.3469, 'grad_norm': 0.837820386523976, 'learning_rate': 2.639721247201714e-06, 'epoch': 0.67} + 67%|██████▋ | 8128/12188 [17:23:58<8:12:31, 7.28s/it] 67%|██████▋ | 8129/12188 [17:24:05<8:08:56, 7.23s/it] {'loss': 0.3192, 'grad_norm': 0.6615683063390081, 'learning_rate': 2.6385499848675086e-06, 'epoch': 0.67} + 67%|██████▋ | 8129/12188 [17:24:05<8:08:56, 7.23s/it] 67%|██████▋ | 8130/12188 [17:24:15<9:08:41, 8.11s/it] {'loss': 0.3394, 'grad_norm': 0.6727767448893986, 'learning_rate': 2.637378889295111e-06, 'epoch': 0.67} + 67%|██████▋ | 8130/12188 [17:24:15<9:08:41, 8.11s/it] 67%|██████▋ | 8131/12188 [17:24:22<8:39:17, 7.68s/it] {'loss': 0.2839, 'grad_norm': 0.5941384548812534, 'learning_rate': 2.6362079605672185e-06, 'epoch': 0.67} + 67%|██████▋ | 8131/12188 [17:24:22<8:39:17, 7.68s/it] 67%|██████▋ | 8132/12188 [17:24:29<8:21:27, 7.42s/it] {'loss': 0.3001, 'grad_norm': 0.7181424550604965, 'learning_rate': 2.635037198766522e-06, 'epoch': 0.67} + 67%|██████▋ | 8132/12188 [17:24:29<8:21:27, 7.42s/it] 67%|██████▋ | 8133/12188 [17:24:36<8:08:06, 7.22s/it] {'loss': 0.3265, 'grad_norm': 0.6692103986756982, 'learning_rate': 2.633866603975701e-06, 'epoch': 0.67} + 67%|██████▋ | 8133/12188 [17:24:36<8:08:06, 7.22s/it] 67%|██████▋ | 8134/12188 [17:24:44<8:27:26, 7.51s/it] {'loss': 0.311, 'grad_norm': 0.6806407853006319, 'learning_rate': 2.6326961762774167e-06, 'epoch': 0.67} + 67%|██████▋ | 8134/12188 [17:24:44<8:27:26, 7.51s/it] 67%|██████▋ | 8135/12188 [17:24:51<8:15:33, 7.34s/it] {'loss': 0.363, 'grad_norm': 0.7249538073675089, 'learning_rate': 2.631525915754327e-06, 'epoch': 0.67} + 67%|██████▋ | 8135/12188 [17:24:51<8:15:33, 7.34s/it] 67%|██████▋ | 8136/12188 [17:24:57<8:00:47, 7.12s/it] {'loss': 0.3169, 'grad_norm': 0.6999870269766665, 'learning_rate': 2.6303558224890704e-06, 'epoch': 0.67} + 67%|██████▋ | 8136/12188 [17:24:57<8:00:47, 7.12s/it] 67%|██████▋ | 8137/12188 [17:25:05<8:01:53, 7.14s/it] {'loss': 0.3301, 'grad_norm': 0.7131143692699918, 'learning_rate': 2.629185896564278e-06, 'epoch': 0.67} + 67%|██████▋ | 8137/12188 [17:25:05<8:01:53, 7.14s/it] 67%|██████▋ | 8138/12188 [17:25:11<7:56:02, 7.05s/it] {'loss': 0.3105, 'grad_norm': 0.6797668714849202, 'learning_rate': 2.6280161380625707e-06, 'epoch': 0.67} + 67%|██████▋ | 8138/12188 [17:25:11<7:56:02, 7.05s/it] 67%|██████▋ | 8139/12188 [17:25:19<8:03:44, 7.17s/it] {'loss': 0.2923, 'grad_norm': 0.7323832287144254, 'learning_rate': 2.626846547066551e-06, 'epoch': 0.67} + 67%|██████▋ | 8139/12188 [17:25:19<8:03:44, 7.17s/it] 67%|██████▋ | 8140/12188 [17:25:26<8:05:50, 7.20s/it] {'loss': 0.3439, 'grad_norm': 0.6723776864239038, 'learning_rate': 2.6256771236588174e-06, 'epoch': 0.67} + 67%|██████▋ | 8140/12188 [17:25:26<8:05:50, 7.20s/it] 67%|██████▋ | 8141/12188 [17:25:34<8:15:11, 7.34s/it] {'loss': 0.3353, 'grad_norm': 0.6410461778248537, 'learning_rate': 2.6245078679219503e-06, 'epoch': 0.67} + 67%|██████▋ | 8141/12188 [17:25:34<8:15:11, 7.34s/it] 67%|██████▋ | 8142/12188 [17:25:41<8:04:34, 7.19s/it] {'loss': 0.3173, 'grad_norm': 0.6347722817095229, 'learning_rate': 2.6233387799385194e-06, 'epoch': 0.67} + 67%|██████▋ | 8142/12188 [17:25:41<8:04:34, 7.19s/it] 67%|██████▋ | 8143/12188 [17:25:49<8:20:50, 7.43s/it] {'loss': 0.3042, 'grad_norm': 1.4644522030650402, 'learning_rate': 2.622169859791088e-06, 'epoch': 0.67} + 67%|██████▋ | 8143/12188 [17:25:49<8:20:50, 7.43s/it] 67%|██████▋ | 8144/12188 [17:25:56<8:08:58, 7.25s/it] {'loss': 0.2929, 'grad_norm': 0.6440476632582055, 'learning_rate': 2.6210011075621977e-06, 'epoch': 0.67} + 67%|██████▋ | 8144/12188 [17:25:56<8:08:58, 7.25s/it] 67%|██████▋ | 8145/12188 [17:26:03<8:13:25, 7.32s/it] {'loss': 0.2444, 'grad_norm': 0.6524827453287786, 'learning_rate': 2.61983252333439e-06, 'epoch': 0.67} + 67%|██████▋ | 8145/12188 [17:26:03<8:13:25, 7.32s/it] 67%|██████▋ | 8146/12188 [17:26:10<7:57:45, 7.09s/it] {'loss': 0.3231, 'grad_norm': 0.6379372349024204, 'learning_rate': 2.618664107190183e-06, 'epoch': 0.67} + 67%|██████▋ | 8146/12188 [17:26:10<7:57:45, 7.09s/it] 67%|██████▋ | 8147/12188 [17:26:16<7:49:03, 6.96s/it] {'loss': 0.3257, 'grad_norm': 0.6432079303479796, 'learning_rate': 2.617495859212091e-06, 'epoch': 0.67} + 67%|██████▋ | 8147/12188 [17:26:16<7:49:03, 6.96s/it] 67%|███��██▋ | 8148/12188 [17:26:24<8:03:19, 7.18s/it] {'loss': 0.3313, 'grad_norm': 0.6480653517664382, 'learning_rate': 2.616327779482615e-06, 'epoch': 0.67} + 67%|██████▋ | 8148/12188 [17:26:24<8:03:19, 7.18s/it] 67%|██████▋ | 8149/12188 [17:26:31<7:58:50, 7.11s/it] {'loss': 0.3132, 'grad_norm': 0.6662150036632537, 'learning_rate': 2.6151598680842405e-06, 'epoch': 0.67} + 67%|██████▋ | 8149/12188 [17:26:31<7:58:50, 7.11s/it] 67%|██████▋ | 8150/12188 [17:26:39<8:19:12, 7.42s/it] {'loss': 0.3563, 'grad_norm': 0.6639229354896464, 'learning_rate': 2.6139921250994438e-06, 'epoch': 0.67} + 67%|██████▋ | 8150/12188 [17:26:39<8:19:12, 7.42s/it] 67%|██████▋ | 8151/12188 [17:26:47<8:25:15, 7.51s/it] {'loss': 0.2982, 'grad_norm': 0.6562889436331024, 'learning_rate': 2.6128245506106927e-06, 'epoch': 0.67} + 67%|██████▋ | 8151/12188 [17:26:47<8:25:15, 7.51s/it] 67%|██████▋ | 8152/12188 [17:26:54<8:18:16, 7.41s/it] {'loss': 0.3378, 'grad_norm': 0.74076463421231, 'learning_rate': 2.6116571447004335e-06, 'epoch': 0.67} + 67%|██████▋ | 8152/12188 [17:26:54<8:18:16, 7.41s/it] 67%|██████▋ | 8153/12188 [17:27:02<8:36:33, 7.68s/it] {'loss': 0.2845, 'grad_norm': 0.6782853852071222, 'learning_rate': 2.6104899074511125e-06, 'epoch': 0.67} + 67%|██████▋ | 8153/12188 [17:27:02<8:36:33, 7.68s/it] 67%|██████▋ | 8154/12188 [17:27:10<8:38:57, 7.72s/it] {'loss': 0.3147, 'grad_norm': 0.6513678402579531, 'learning_rate': 2.6093228389451547e-06, 'epoch': 0.67} + 67%|██████▋ | 8154/12188 [17:27:10<8:38:57, 7.72s/it] 67%|██████▋ | 8155/12188 [17:27:17<8:24:46, 7.51s/it] {'loss': 0.2905, 'grad_norm': 0.5925118463421216, 'learning_rate': 2.6081559392649754e-06, 'epoch': 0.67} + 67%|██████▋ | 8155/12188 [17:27:17<8:24:46, 7.51s/it] 67%|██████▋ | 8156/12188 [17:27:25<8:43:33, 7.79s/it] {'loss': 0.2791, 'grad_norm': 0.5985026531199384, 'learning_rate': 2.6069892084929825e-06, 'epoch': 0.67} + 67%|██████▋ | 8156/12188 [17:27:25<8:43:33, 7.79s/it] 67%|██████▋ | 8157/12188 [17:27:32<8:26:48, 7.54s/it] {'loss': 0.2912, 'grad_norm': 0.663936716330973, 'learning_rate': 2.6058226467115647e-06, 'epoch': 0.67} + 67%|██████▋ | 8157/12188 [17:27:32<8:26:48, 7.54s/it] 67%|██████▋ | 8158/12188 [17:27:40<8:25:22, 7.52s/it] {'loss': 0.3382, 'grad_norm': 0.7304583404517128, 'learning_rate': 2.604656254003108e-06, 'epoch': 0.67} + 67%|██████▋ | 8158/12188 [17:27:40<8:25:22, 7.52s/it] 67%|██████▋ | 8159/12188 [17:27:48<8:27:42, 7.56s/it] {'loss': 0.2794, 'grad_norm': 0.9091436404186509, 'learning_rate': 2.6034900304499754e-06, 'epoch': 0.67} + 67%|██████▋ | 8159/12188 [17:27:48<8:27:42, 7.56s/it] 67%|██████▋ | 8160/12188 [17:27:55<8:24:56, 7.52s/it] {'loss': 0.3047, 'grad_norm': 0.7208340150312141, 'learning_rate': 2.6023239761345277e-06, 'epoch': 0.67} + 67%|██████▋ | 8160/12188 [17:27:55<8:24:56, 7.52s/it] 67%|██████▋ | 8161/12188 [17:28:02<8:16:10, 7.39s/it] {'loss': 0.317, 'grad_norm': 0.6796498641753095, 'learning_rate': 2.6011580911391094e-06, 'epoch': 0.67} + 67%|██████▋ | 8161/12188 [17:28:02<8:16:10, 7.39s/it] 67%|██████▋ | 8162/12188 [17:28:10<8:17:33, 7.42s/it] {'loss': 0.3281, 'grad_norm': 0.6915819096663688, 'learning_rate': 2.599992375546051e-06, 'epoch': 0.67} + 67%|██████▋ | 8162/12188 [17:28:10<8:17:33, 7.42s/it] 67%|██████▋ | 8163/12188 [17:28:19<8:49:29, 7.89s/it] {'loss': 0.3392, 'grad_norm': 0.6747360055943659, 'learning_rate': 2.5988268294376774e-06, 'epoch': 0.67} + 67%|██████▋ | 8163/12188 [17:28:19<8:49:29, 7.89s/it] 67%|██████▋ | 8164/12188 [17:28:26<8:34:09, 7.67s/it] {'loss': 0.2997, 'grad_norm': 0.7102160457581349, 'learning_rate': 2.597661452896293e-06, 'epoch': 0.67} + 67%|██████▋ | 8164/12188 [17:28:26<8:34:09, 7.67s/it] 67%|██████▋ | 8165/12188 [17:28:34<8:50:44, 7.92s/it] {'loss': 0.3091, 'grad_norm': 0.6779858898004372, 'learning_rate': 2.5964962460041977e-06, 'epoch': 0.67} + 67%|██████▋ | 8165/12188 [17:28:34<8:50:44, 7.92s/it] 67%|██████▋ | 8166/12188 [17:28:41<8:31:38, 7.63s/it] {'loss': 0.3005, 'grad_norm': 0.6742282714108595, 'learning_rate': 2.595331208843678e-06, 'epoch': 0.67} + 67%|██████▋ | 8166/12188 [17:28:41<8:31:38, 7.63s/it] 67%|██████▋ | 8167/12188 [17:28:48<8:11:42, 7.34s/it] {'loss': 0.3125, 'grad_norm': 0.668907631913854, 'learning_rate': 2.594166341497002e-06, 'epoch': 0.67} + 67%|██████▋ | 8167/12188 [17:28:48<8:11:42, 7.34s/it] 67%|██████▋ | 8168/12188 [17:28:55<7:59:45, 7.16s/it] {'loss': 0.2862, 'grad_norm': 0.6787387641859602, 'learning_rate': 2.593001644046437e-06, 'epoch': 0.67} + 67%|██████▋ | 8168/12188 [17:28:55<7:59:45, 7.16s/it] 67%|██████▋ | 8169/12188 [17:29:02<7:58:46, 7.15s/it] {'loss': 0.288, 'grad_norm': 0.6780458308364831, 'learning_rate': 2.5918371165742272e-06, 'epoch': 0.67} + 67%|██████▋ | 8169/12188 [17:29:02<7:58:46, 7.15s/it] 67%|██████▋ | 8170/12188 [17:29:09<7:58:12, 7.14s/it] {'loss': 0.3268, 'grad_norm': 0.6590150781016358, 'learning_rate': 2.59067275916261e-06, 'epoch': 0.67} + 67%|██████▋ | 8170/12188 [17:29:09<7:58:12, 7.14s/it] 67%|██████▋ | 8171/12188 [17:29:17<8:27:09, 7.58s/it] {'loss': 0.3257, 'grad_norm': 0.6654381309264227, 'learning_rate': 2.589508571893814e-06, 'epoch': 0.67} + 67%|██████▋ | 8171/12188 [17:29:17<8:27:09, 7.58s/it] 67%|██████▋ | 8172/12188 [17:29:25<8:28:33, 7.60s/it] {'loss': 0.3038, 'grad_norm': 0.6708101594709509, 'learning_rate': 2.5883445548500474e-06, 'epoch': 0.67} + 67%|██████▋ | 8172/12188 [17:29:25<8:28:33, 7.60s/it] 67%|██████▋ | 8173/12188 [17:29:32<8:23:18, 7.52s/it] {'loss': 0.2787, 'grad_norm': 0.7123476594689931, 'learning_rate': 2.587180708113514e-06, 'epoch': 0.67} + 67%|██████▋ | 8173/12188 [17:29:32<8:23:18, 7.52s/it] 67%|██████▋ | 8174/12188 [17:29:39<8:09:07, 7.31s/it] {'loss': 0.2833, 'grad_norm': 0.6151728797771059, 'learning_rate': 2.5860170317664047e-06, 'epoch': 0.67} + 67%|██████▋ | 8174/12188 [17:29:39<8:09:07, 7.31s/it] 67%|██████▋ | 8175/12188 [17:29:47<8:15:43, 7.41s/it] {'loss': 0.327, 'grad_norm': 0.7211218917174692, 'learning_rate': 2.5848535258908918e-06, 'epoch': 0.67} + 67%|██████▋ | 8175/12188 [17:29:47<8:15:43, 7.41s/it] 67%|██████▋ | 8176/12188 [17:29:55<8:22:02, 7.51s/it] {'loss': 0.3239, 'grad_norm': 0.7359921239262016, 'learning_rate': 2.583690190569145e-06, 'epoch': 0.67} + 67%|██████▋ | 8176/12188 [17:29:55<8:22:02, 7.51s/it] 67%|██████▋ | 8177/12188 [17:30:01<8:04:35, 7.25s/it] {'loss': 0.3216, 'grad_norm': 0.7271387913716978, 'learning_rate': 2.5825270258833126e-06, 'epoch': 0.67} + 67%|██████▋ | 8177/12188 [17:30:01<8:04:35, 7.25s/it] 67%|██████▋ | 8178/12188 [17:30:08<7:53:53, 7.09s/it] {'loss': 0.2763, 'grad_norm': 0.8147457991253308, 'learning_rate': 2.5813640319155376e-06, 'epoch': 0.67} + 67%|██████▋ | 8178/12188 [17:30:08<7:53:53, 7.09s/it] 67%|██████▋ | 8179/12188 [17:30:16<8:12:41, 7.37s/it] {'loss': 0.2495, 'grad_norm': 0.5741600279950695, 'learning_rate': 2.5802012087479513e-06, 'epoch': 0.67} + 67%|██████▋ | 8179/12188 [17:30:16<8:12:41, 7.37s/it] 67%|██████▋ | 8180/12188 [17:30:23<8:01:07, 7.20s/it] {'loss': 0.3317, 'grad_norm': 0.6848158198473198, 'learning_rate': 2.5790385564626654e-06, 'epoch': 0.67} + 67%|██████▋ | 8180/12188 [17:30:23<8:01:07, 7.20s/it] 67%|██████▋ | 8181/12188 [17:30:32<8:33:42, 7.69s/it] {'loss': 0.3014, 'grad_norm': 0.6551076964876796, 'learning_rate': 2.5778760751417896e-06, 'epoch': 0.67} + 67%|██████▋ | 8181/12188 [17:30:32<8:33:42, 7.69s/it] 67%|██████▋ | 8182/12188 [17:30:39<8:28:30, 7.62s/it] {'loss': 0.303, 'grad_norm': 0.7117376843015984, 'learning_rate': 2.5767137648674136e-06, 'epoch': 0.67} + 67%|██████▋ | 8182/12188 [17:30:39<8:28:30, 7.62s/it] 67%|██████▋ | 8183/12188 [17:30:50<9:30:23, 8.55s/it] {'loss': 0.2972, 'grad_norm': 0.8192909981509287, 'learning_rate': 2.5755516257216163e-06, 'epoch': 0.67} + 67%|██████▋ | 8183/12188 [17:30:50<9:30:23, 8.55s/it] 67%|██████▋ | 8184/12188 [17:30:57<8:59:23, 8.08s/it] {'loss': 0.3085, 'grad_norm': 0.6741981710184765, 'learning_rate': 2.57438965778647e-06, 'epoch': 0.67} + 67%|██████▋ | 8184/12188 [17:30:57<8:59:23, 8.08s/it] 67%|██████▋ | 8185/12188 [17:31:04<8:39:32, 7.79s/it] {'loss': 0.3015, 'grad_norm': 0.8436251740587363, 'learning_rate': 2.5732278611440265e-06, 'epoch': 0.67} + 67%|██████▋ | 8185/12188 [17:31:04<8:39:32, 7.79s/it] 67%|██████▋ | 8186/12188 [17:31:11<8:29:21, 7.64s/it] {'loss': 0.3022, 'grad_norm': 0.635303833837762, 'learning_rate': 2.572066235876334e-06, 'epoch': 0.67} + 67%|██████▋ | 8186/12188 [17:31:11<8:29:21, 7.64s/it] 67%|██████▋ | 8187/12188 [17:31:18<8:16:20, 7.44s/it] {'loss': 0.2989, 'grad_norm': 0.6415965000562331, 'learning_rate': 2.5709047820654236e-06, 'epoch': 0.67} + 67%|██████▋ | 8187/12188 [17:31:18<8:16:20, 7.44s/it] 67%|██████▋ | 8188/12188 [17:31:25<8:02:46, 7.24s/it] {'loss': 0.3178, 'grad_norm': 0.7510122053697051, 'learning_rate': 2.5697434997933133e-06, 'epoch': 0.67} + 67%|██████▋ | 8188/12188 [17:31:25<8:02:46, 7.24s/it] 67%|██████▋ | 8189/12188 [17:31:33<8:18:01, 7.47s/it] {'loss': 0.3213, 'grad_norm': 0.7341908339375776, 'learning_rate': 2.5685823891420146e-06, 'epoch': 0.67} + 67%|██████▋ | 8189/12188 [17:31:33<8:18:01, 7.47s/it] 67%|██████▋ | 8190/12188 [17:31:41<8:39:32, 7.80s/it] {'loss': 0.3004, 'grad_norm': 0.8088667559578648, 'learning_rate': 2.5674214501935198e-06, 'epoch': 0.67} + 67%|██████▋ | 8190/12188 [17:31:41<8:39:32, 7.80s/it] 67%|██████▋ | 8191/12188 [17:31:49<8:40:34, 7.81s/it] {'loss': 0.3028, 'grad_norm': 0.6572969113792508, 'learning_rate': 2.566260683029813e-06, 'epoch': 0.67} + 67%|██████▋ | 8191/12188 [17:31:49<8:40:34, 7.81s/it] 67%|██████▋ | 8192/12188 [17:31:57<8:33:37, 7.71s/it] {'loss': 0.3234, 'grad_norm': 0.6721398141587366, 'learning_rate': 2.5651000877328692e-06, 'epoch': 0.67} + 67%|██████▋ | 8192/12188 [17:31:57<8:33:37, 7.71s/it] 67%|██████▋ | 8193/12188 [17:32:04<8:13:02, 7.40s/it] {'loss': 0.3231, 'grad_norm': 0.705007418271208, 'learning_rate': 2.5639396643846436e-06, 'epoch': 0.67} + 67%|██████▋ | 8193/12188 [17:32:04<8:13:02, 7.40s/it] 67%|██████▋ | 8194/12188 [17:32:10<8:03:24, 7.26s/it] {'loss': 0.2888, 'grad_norm': 0.6736570582587528, 'learning_rate': 2.5627794130670863e-06, 'epoch': 0.67} + 67%|██████▋ | 8194/12188 [17:32:10<8:03:24, 7.26s/it] 67%|██████▋ | 8195/12188 [17:32:18<8:03:32, 7.27s/it] {'loss': 0.3121, 'grad_norm': 0.7109293122804601, 'learning_rate': 2.56161933386213e-06, 'epoch': 0.67} + 67%|██████▋ | 8195/12188 [17:32:18<8:03:32, 7.27s/it] 67%|██████▋ | 8196/12188 [17:32:24<7:47:09, 7.02s/it] {'loss': 0.283, 'grad_norm': 0.6639248152571967, 'learning_rate': 2.5604594268516995e-06, 'epoch': 0.67} + 67%|██████▋ | 8196/12188 [17:32:24<7:47:09, 7.02s/it] 67%|██████▋ | 8197/12188 [17:32:31<7:47:28, 7.03s/it] {'loss': 0.3138, 'grad_norm': 1.785582809651136, 'learning_rate': 2.559299692117706e-06, 'epoch': 0.67} + 67%|██████▋ | 8197/12188 [17:32:31<7:47:28, 7.03s/it] 67%|██████▋ | 8198/12188 [17:32:40<8:13:15, 7.42s/it] {'loss': 0.3103, 'grad_norm': 0.7236312110453947, 'learning_rate': 2.5581401297420444e-06, 'epoch': 0.67} + 67%|██████▋ | 8198/12188 [17:32:40<8:13:15, 7.42s/it] 67%|██████▋ | 8199/12188 [17:32:46<7:59:25, 7.21s/it] {'loss': 0.2955, 'grad_norm': 0.715287777226239, 'learning_rate': 2.5569807398066036e-06, 'epoch': 0.67} + 67%|██████▋ | 8199/12188 [17:32:46<7:59:25, 7.21s/it] 67%|██████▋ | 8200/12188 [17:32:53<7:56:51, 7.17s/it] {'loss': 0.2984, 'grad_norm': 0.6514545843591932, 'learning_rate': 2.55582152239326e-06, 'epoch': 0.67} + 67%|██████▋ | 8200/12188 [17:32:53<7:56:51, 7.17s/it] 67%|██████▋ | 8201/12188 [17:33:01<8:14:30, 7.44s/it] {'loss': 0.2914, 'grad_norm': 0.6473567894613749, 'learning_rate': 2.5546624775838725e-06, 'epoch': 0.67} + 67%|██████▋ | 8201/12188 [17:33:01<8:14:30, 7.44s/it] 67%|██████▋ | 8202/12188 [17:33:10<8:28:10, 7.65s/it] {'loss': 0.3097, 'grad_norm': 0.8032997522720652, 'learning_rate': 2.5535036054602934e-06, 'epoch': 0.67} + 67%|██████▋ | 8202/12188 [17:33:10<8:28:10, 7.65s/it] 67%|██████▋ | 8203/12188 [17:33:18<8:46:20, 7.92s/it] {'loss': 0.2859, 'grad_norm': 0.6235762450323863, 'learning_rate': 2.552344906104357e-06, 'epoch': 0.67} + 67%|██████▋ | 8203/12188 [17:33:18<8:46:20, 7.92s/it] 67%|██████▋ | 8204/12188 [17:33:27<8:57:08, 8.09s/it] {'loss': 0.3, 'grad_norm': 0.6449381710364568, 'learning_rate': 2.551186379597891e-06, 'epoch': 0.67} + 67%|██████▋ | 8204/12188 [17:33:27<8:57:08, 8.09s/it] 67%|██████▋ | 8205/12188 [17:33:35<9:03:46, 8.19s/it] {'loss': 0.3433, 'grad_norm': 0.6697603070893805, 'learning_rate': 2.5500280260227106e-06, 'epoch': 0.67} + 67%|██████▋ | 8205/12188 [17:33:35<9:03:46, 8.19s/it] 67%|██████▋ | 8206/12188 [17:33:42<8:40:02, 7.84s/it] {'loss': 0.3476, 'grad_norm': 0.8431303902489062, 'learning_rate': 2.5488698454606128e-06, 'epoch': 0.67} + 67%|██████▋ | 8206/12188 [17:33:42<8:40:02, 7.84s/it] 67%|██████▋ | 8207/12188 [17:33:49<8:26:26, 7.63s/it] {'loss': 0.3316, 'grad_norm': 0.6174061456902428, 'learning_rate': 2.5477118379933903e-06, 'epoch': 0.67} + 67%|██████▋ | 8207/12188 [17:33:49<8:26:26, 7.63s/it] 67%|██████▋ | 8208/12188 [17:33:56<8:07:36, 7.35s/it] {'loss': 0.307, 'grad_norm': 0.8034581054843295, 'learning_rate': 2.546554003702816e-06, 'epoch': 0.67} + 67%|██████▋ | 8208/12188 [17:33:56<8:07:36, 7.35s/it] 67%|██████▋ | 8209/12188 [17:34:03<7:58:56, 7.22s/it] {'loss': 0.2902, 'grad_norm': 0.6859813351186242, 'learning_rate': 2.5453963426706585e-06, 'epoch': 0.67} + 67%|██████▋ | 8209/12188 [17:34:03<7:58:56, 7.22s/it] 67%|██████▋ | 8210/12188 [17:34:10<7:56:43, 7.19s/it] {'loss': 0.3024, 'grad_norm': 1.0068267857392428, 'learning_rate': 2.5442388549786668e-06, 'epoch': 0.67} + 67%|██████▋ | 8210/12188 [17:34:10<7:56:43, 7.19s/it] 67%|██████▋ | 8211/12188 [17:34:17<8:04:22, 7.31s/it] {'loss': 0.3785, 'grad_norm': 0.6491049922210534, 'learning_rate': 2.5430815407085805e-06, 'epoch': 0.67} + 67%|██████▋ | 8211/12188 [17:34:18<8:04:22, 7.31s/it] 67%|██████▋ | 8212/12188 [17:34:24<7:58:04, 7.21s/it] {'loss': 0.3333, 'grad_norm': 0.6263892646848009, 'learning_rate': 2.5419243999421305e-06, 'epoch': 0.67} + 67%|██████▋ | 8212/12188 [17:34:25<7:58:04, 7.21s/it] 67%|██████▋ | 8213/12188 [17:34:32<7:56:52, 7.20s/it] {'loss': 0.2915, 'grad_norm': 0.6745267031320171, 'learning_rate': 2.5407674327610277e-06, 'epoch': 0.67} + 67%|██████▋ | 8213/12188 [17:34:32<7:56:52, 7.20s/it] 67%|██████▋ | 8214/12188 [17:34:39<7:58:23, 7.22s/it] {'loss': 0.3128, 'grad_norm': 0.6850156417416312, 'learning_rate': 2.539610639246978e-06, 'epoch': 0.67} + 67%|██████▋ | 8214/12188 [17:34:39<7:58:23, 7.22s/it] 67%|██████▋ | 8215/12188 [17:34:46<7:52:13, 7.13s/it] {'loss': 0.2878, 'grad_norm': 0.6136271227679502, 'learning_rate': 2.5384540194816743e-06, 'epoch': 0.67} + 67%|██████▋ | 8215/12188 [17:34:46<7:52:13, 7.13s/it] 67%|██████▋ | 8216/12188 [17:34:54<8:05:01, 7.33s/it] {'loss': 0.3051, 'grad_norm': 0.7313377768000188, 'learning_rate': 2.5372975735467905e-06, 'epoch': 0.67} + 67%|██████▋ | 8216/12188 [17:34:54<8:05:01, 7.33s/it] 67%|██████▋ | 8217/12188 [17:35:00<7:51:08, 7.12s/it] {'loss': 0.3068, 'grad_norm': 0.6609957878579671, 'learning_rate': 2.5361413015239957e-06, 'epoch': 0.67} + 67%|██████▋ | 8217/12188 [17:35:00<7:51:08, 7.12s/it] 67%|██████▋ | 8218/12188 [17:35:07<7:46:32, 7.05s/it] {'loss': 0.3046, 'grad_norm': 0.661754539804971, 'learning_rate': 2.534985203494945e-06, 'epoch': 0.67} + 67%|██████▋ | 8218/12188 [17:35:07<7:46:32, 7.05s/it] 67%|██████▋ | 8219/12188 [17:35:14<7:51:26, 7.13s/it] {'loss': 0.3158, 'grad_norm': 0.6672268619070145, 'learning_rate': 2.533829279541278e-06, 'epoch': 0.67} + 67%|██████▋ | 8219/12188 [17:35:14<7:51:26, 7.13s/it] 67%|██████▋ | 8220/12188 [17:35:21<7:47:44, 7.07s/it] {'loss': 0.3112, 'grad_norm': 0.7162871584891964, 'learning_rate': 2.5326735297446257e-06, 'epoch': 0.67} + 67%|██████▋ | 8220/12188 [17:35:21<7:47:44, 7.07s/it] 67%|██████▋ | 8221/12188 [17:35:29<8:00:55, 7.27s/it] {'loss': 0.3193, 'grad_norm': 0.660995699968139, 'learning_rate': 2.5315179541866043e-06, 'epoch': 0.67} + 67%|██████▋ | 8221/12188 [17:35:29<8:00:55, 7.27s/it] 67%|██████▋ | 8222/12188 [17:35:37<8:03:22, 7.31s/it] {'loss': 0.3027, 'grad_norm': 0.7019934165488235, 'learning_rate': 2.5303625529488197e-06, 'epoch': 0.67} + 67%|██████▋ | 8222/12188 [17:35:37<8:03:22, 7.31s/it] 67%|██████▋ | 8223/12188 [17:35:43<7:54:43, 7.18s/it] {'loss': 0.2886, 'grad_norm': 0.675697052054468, 'learning_rate': 2.5292073261128624e-06, 'epoch': 0.67} + 67%|██████▋ | 8223/12188 [17:35:43<7:54:43, 7.18s/it] 67%|██████▋ | 8224/12188 [17:35:51<7:58:30, 7.24s/it] {'loss': 0.3056, 'grad_norm': 0.7165303628992089, 'learning_rate': 2.5280522737603162e-06, 'epoch': 0.67} + 67%|██████▋ | 8224/12188 [17:35:51<7:58:30, 7.24s/it] 67%|██████▋ | 8225/12188 [17:35:58<8:06:06, 7.36s/it] {'loss': 0.2957, 'grad_norm': 0.6725406526331622, 'learning_rate': 2.5268973959727473e-06, 'epoch': 0.67} + 67%|██████▋ | 8225/12188 [17:35:58<8:06:06, 7.36s/it] 67%|██████▋ | 8226/12188 [17:36:05<7:54:10, 7.18s/it] {'loss': 0.299, 'grad_norm': 0.7227347210732469, 'learning_rate': 2.525742692831708e-06, 'epoch': 0.67} + 67%|██████▋ | 8226/12188 [17:36:05<7:54:10, 7.18s/it] 68%|██████▊ | 8227/12188 [17:36:13<7:58:56, 7.25s/it] {'loss': 0.2889, 'grad_norm': 0.6558634734980996, 'learning_rate': 2.5245881644187453e-06, 'epoch': 0.67} + 68%|██████▊ | 8227/12188 [17:36:13<7:58:56, 7.25s/it] 68%|██████▊ | 8228/12188 [17:36:19<7:47:32, 7.08s/it] {'loss': 0.3245, 'grad_norm': 0.7054439296638866, 'learning_rate': 2.523433810815391e-06, 'epoch': 0.68} + 68%|██████▊ | 8228/12188 [17:36:19<7:47:32, 7.08s/it] 68%|██████▊ | 8229/12188 [17:36:26<7:42:11, 7.00s/it] {'loss': 0.2927, 'grad_norm': 0.66711674945728, 'learning_rate': 2.52227963210316e-06, 'epoch': 0.68} + 68%|██████▊ | 8229/12188 [17:36:26<7:42:11, 7.00s/it] 68%|██████▊ | 8230/12188 [17:36:33<7:44:09, 7.04s/it] {'loss': 0.3437, 'grad_norm': 0.7161721965383536, 'learning_rate': 2.5211256283635627e-06, 'epoch': 0.68} + 68%|██���███▊ | 8230/12188 [17:36:33<7:44:09, 7.04s/it] 68%|██████▊ | 8231/12188 [17:36:40<7:44:46, 7.05s/it] {'loss': 0.32, 'grad_norm': 0.8293361245471647, 'learning_rate': 2.519971799678089e-06, 'epoch': 0.68} + 68%|██████▊ | 8231/12188 [17:36:40<7:44:46, 7.05s/it] 68%|██████▊ | 8232/12188 [17:36:47<7:39:43, 6.97s/it] {'loss': 0.3292, 'grad_norm': 0.7001075490823231, 'learning_rate': 2.5188181461282225e-06, 'epoch': 0.68} + 68%|██████▊ | 8232/12188 [17:36:47<7:39:43, 6.97s/it] 68%|██████▊ | 8233/12188 [17:36:54<7:39:23, 6.97s/it] {'loss': 0.2902, 'grad_norm': 0.6901065922471759, 'learning_rate': 2.517664667795434e-06, 'epoch': 0.68} + 68%|██████▊ | 8233/12188 [17:36:54<7:39:23, 6.97s/it] 68%|██████▊ | 8234/12188 [17:37:01<7:40:22, 6.99s/it] {'loss': 0.2951, 'grad_norm': 0.6896446589808451, 'learning_rate': 2.516511364761176e-06, 'epoch': 0.68} + 68%|██████▊ | 8234/12188 [17:37:01<7:40:22, 6.99s/it] 68%|██████▊ | 8235/12188 [17:37:08<7:43:11, 7.03s/it] {'loss': 0.3208, 'grad_norm': 0.6462385772246437, 'learning_rate': 2.5153582371068965e-06, 'epoch': 0.68} + 68%|██████▊ | 8235/12188 [17:37:08<7:43:11, 7.03s/it] 68%|██████▊ | 8236/12188 [17:37:15<7:42:07, 7.02s/it] {'loss': 0.315, 'grad_norm': 0.638789993251734, 'learning_rate': 2.5142052849140274e-06, 'epoch': 0.68} + 68%|██████▊ | 8236/12188 [17:37:15<7:42:07, 7.02s/it] 68%|██████▊ | 8237/12188 [17:37:24<8:17:28, 7.55s/it] {'loss': 0.287, 'grad_norm': 0.7292259846351746, 'learning_rate': 2.5130525082639878e-06, 'epoch': 0.68} + 68%|██████▊ | 8237/12188 [17:37:24<8:17:28, 7.55s/it] 68%|██████▊ | 8238/12188 [17:37:32<8:19:54, 7.59s/it] {'loss': 0.3028, 'grad_norm': 0.7912604541990902, 'learning_rate': 2.5118999072381846e-06, 'epoch': 0.68} + 68%|██████▊ | 8238/12188 [17:37:32<8:19:54, 7.59s/it] 68%|██████▊ | 8239/12188 [17:37:39<8:20:49, 7.61s/it] {'loss': 0.306, 'grad_norm': 0.6707389489770805, 'learning_rate': 2.5107474819180098e-06, 'epoch': 0.68} + 68%|██████▊ | 8239/12188 [17:37:39<8:20:49, 7.61s/it] 68%|██████▊ | 8240/12188 [17:37:47<8:23:51, 7.66s/it] {'loss': 0.3425, 'grad_norm': 0.7062855747666732, 'learning_rate': 2.5095952323848496e-06, 'epoch': 0.68} + 68%|██████▊ | 8240/12188 [17:37:47<8:23:51, 7.66s/it] 68%|██████▊ | 8241/12188 [17:37:54<8:10:36, 7.46s/it] {'loss': 0.3226, 'grad_norm': 0.7212468414076025, 'learning_rate': 2.508443158720074e-06, 'epoch': 0.68} + 68%|██████▊ | 8241/12188 [17:37:54<8:10:36, 7.46s/it] 68%|██████▊ | 8242/12188 [17:38:01<8:01:51, 7.33s/it] {'loss': 0.3321, 'grad_norm': 0.7197356678351364, 'learning_rate': 2.5072912610050382e-06, 'epoch': 0.68} + 68%|██████▊ | 8242/12188 [17:38:01<8:01:51, 7.33s/it] 68%|██████▊ | 8243/12188 [17:38:09<8:10:15, 7.46s/it] {'loss': 0.3264, 'grad_norm': 0.6512645794465217, 'learning_rate': 2.5061395393210907e-06, 'epoch': 0.68} + 68%|██████▊ | 8243/12188 [17:38:09<8:10:15, 7.46s/it] 68%|██████▊ | 8244/12188 [17:38:16<7:53:01, 7.20s/it] {'loss': 0.3274, 'grad_norm': 0.7122606400667696, 'learning_rate': 2.5049879937495603e-06, 'epoch': 0.68} + 68%|██████▊ | 8244/12188 [17:38:16<7:53:01, 7.20s/it] 68%|██████▊ | 8245/12188 [17:38:23<7:50:01, 7.15s/it] {'loss': 0.31, 'grad_norm': 0.6417451242013785, 'learning_rate': 2.503836624371768e-06, 'epoch': 0.68} + 68%|██████▊ | 8245/12188 [17:38:23<7:50:01, 7.15s/it] 68%|██████▊ | 8246/12188 [17:38:30<7:47:57, 7.12s/it] {'loss': 0.3123, 'grad_norm': 0.6739431558362823, 'learning_rate': 2.502685431269026e-06, 'epoch': 0.68} + 68%|██████▊ | 8246/12188 [17:38:30<7:47:57, 7.12s/it] 68%|██████▊ | 8247/12188 [17:38:36<7:35:35, 6.94s/it] {'loss': 0.3104, 'grad_norm': 0.6415563845534185, 'learning_rate': 2.501534414522625e-06, 'epoch': 0.68} + 68%|██████▊ | 8247/12188 [17:38:36<7:35:35, 6.94s/it] 68%|██████▊ | 8248/12188 [17:38:43<7:36:56, 6.96s/it] {'loss': 0.2937, 'grad_norm': 0.6993839504681887, 'learning_rate': 2.5003835742138506e-06, 'epoch': 0.68} + 68%|██████▊ | 8248/12188 [17:38:43<7:36:56, 6.96s/it] 68%|██████▊ | 8249/12188 [17:38:50<7:37:48, 6.97s/it] {'loss': 0.2786, 'grad_norm': 0.6860711794763299, 'learning_rate': 2.499232910423971e-06, 'epoch': 0.68} + 68%|██████▊ | 8249/12188 [17:38:50<7:37:48, 6.97s/it] 68%|██████▊ | 8250/12188 [17:38:57<7:42:15, 7.04s/it] {'loss': 0.3131, 'grad_norm': 0.7113135738025319, 'learning_rate': 2.498082423234247e-06, 'epoch': 0.68} + 68%|██████▊ | 8250/12188 [17:38:57<7:42:15, 7.04s/it] 68%|██████▊ | 8251/12188 [17:39:04<7:43:08, 7.06s/it] {'loss': 0.341, 'grad_norm': 0.7683167025474629, 'learning_rate': 2.496932112725921e-06, 'epoch': 0.68} + 68%|██████▊ | 8251/12188 [17:39:04<7:43:08, 7.06s/it] 68%|██████▊ | 8252/12188 [17:39:11<7:32:29, 6.90s/it] {'loss': 0.3062, 'grad_norm': 0.6897940571162184, 'learning_rate': 2.495781978980229e-06, 'epoch': 0.68} + 68%|██████▊ | 8252/12188 [17:39:11<7:32:29, 6.90s/it] 68%|██████▊ | 8253/12188 [17:39:18<7:31:20, 6.88s/it] {'loss': 0.3074, 'grad_norm': 0.7448288131618244, 'learning_rate': 2.4946320220783892e-06, 'epoch': 0.68} + 68%|██████▊ | 8253/12188 [17:39:18<7:31:20, 6.88s/it] 68%|██████▊ | 8254/12188 [17:39:25<7:38:09, 6.99s/it] {'loss': 0.329, 'grad_norm': 0.6609809283583811, 'learning_rate': 2.4934822421016123e-06, 'epoch': 0.68} + 68%|██████▊ | 8254/12188 [17:39:25<7:38:09, 6.99s/it] 68%|██████▊ | 8255/12188 [17:39:33<7:52:35, 7.21s/it] {'loss': 0.2834, 'grad_norm': 0.7144552945103181, 'learning_rate': 2.4923326391310904e-06, 'epoch': 0.68} + 68%|██████▊ | 8255/12188 [17:39:33<7:52:35, 7.21s/it] 68%|██████▊ | 8256/12188 [17:39:40<7:49:10, 7.16s/it] {'loss': 0.3153, 'grad_norm': 0.6395652210267223, 'learning_rate': 2.4911832132480108e-06, 'epoch': 0.68} + 68%|██████▊ | 8256/12188 [17:39:40<7:49:10, 7.16s/it] 68%|██████▊ | 8257/12188 [17:39:47<7:57:25, 7.29s/it] {'loss': 0.3203, 'grad_norm': 0.665143188424082, 'learning_rate': 2.4900339645335404e-06, 'epoch': 0.68} + 68%|██████▊ | 8257/12188 [17:39:47<7:57:25, 7.29s/it] 68%|██████▊ | 8258/12188 [17:39:55<7:59:15, 7.32s/it] {'loss': 0.2978, 'grad_norm': 0.7379739600239136, 'learning_rate': 2.488884893068838e-06, 'epoch': 0.68} + 68%|██████▊ | 8258/12188 [17:39:55<7:59:15, 7.32s/it] 68%|██████▊ | 8259/12188 [17:40:02<7:57:40, 7.29s/it] {'loss': 0.2903, 'grad_norm': 0.5987309129474184, 'learning_rate': 2.4877359989350526e-06, 'epoch': 0.68} + 68%|██████▊ | 8259/12188 [17:40:02<7:57:40, 7.29s/it] 68%|██████▊ | 8260/12188 [17:40:09<7:52:38, 7.22s/it] {'loss': 0.3018, 'grad_norm': 0.7430598032867024, 'learning_rate': 2.4865872822133124e-06, 'epoch': 0.68} + 68%|██████▊ | 8260/12188 [17:40:09<7:52:38, 7.22s/it] 68%|██████▊ | 8261/12188 [17:40:20<9:06:50, 8.36s/it] {'loss': 0.3034, 'grad_norm': 0.6865215727581864, 'learning_rate': 2.485438742984743e-06, 'epoch': 0.68} + 68%|██████▊ | 8261/12188 [17:40:20<9:06:50, 8.36s/it] 68%|██████▊ | 8262/12188 [17:40:27<8:35:54, 7.88s/it] {'loss': 0.3559, 'grad_norm': 0.680879291394232, 'learning_rate': 2.4842903813304464e-06, 'epoch': 0.68} + 68%|██████▊ | 8262/12188 [17:40:27<8:35:54, 7.88s/it] 68%|██████▊ | 8263/12188 [17:40:34<8:18:45, 7.62s/it] {'loss': 0.296, 'grad_norm': 0.6773931085708029, 'learning_rate': 2.4831421973315216e-06, 'epoch': 0.68} + 68%|██████▊ | 8263/12188 [17:40:34<8:18:45, 7.62s/it] 68%|██████▊ | 8264/12188 [17:40:41<8:01:50, 7.37s/it] {'loss': 0.366, 'grad_norm': 0.8358115910114993, 'learning_rate': 2.481994191069054e-06, 'epoch': 0.68} + 68%|██████▊ | 8264/12188 [17:40:41<8:01:50, 7.37s/it] 68%|██████▊ | 8265/12188 [17:40:49<8:18:16, 7.62s/it] {'loss': 0.3225, 'grad_norm': 0.6394689583586362, 'learning_rate': 2.4808463626241103e-06, 'epoch': 0.68} + 68%|██████▊ | 8265/12188 [17:40:49<8:18:16, 7.62s/it] 68%|██████▊ | 8266/12188 [17:40:56<8:17:23, 7.61s/it] {'loss': 0.3243, 'grad_norm': 0.6676622303499525, 'learning_rate': 2.4796987120777493e-06, 'epoch': 0.68} + 68%|██████▊ | 8266/12188 [17:40:56<8:17:23, 7.61s/it] 68%|██████▊ | 8267/12188 [17:41:04<8:25:37, 7.74s/it] {'loss': 0.2885, 'grad_norm': 0.649058000072509, 'learning_rate': 2.4785512395110147e-06, 'epoch': 0.68} + 68%|██████▊ | 8267/12188 [17:41:04<8:25:37, 7.74s/it] 68%|██████▊ | 8268/12188 [17:41:11<8:10:43, 7.51s/it] {'loss': 0.2994, 'grad_norm': 0.6662107373386024, 'learning_rate': 2.47740394500494e-06, 'epoch': 0.68} + 68%|██████▊ | 8268/12188 [17:41:11<8:10:43, 7.51s/it] 68%|██████▊ | 8269/12188 [17:41:18<7:52:11, 7.23s/it] {'loss': 0.3244, 'grad_norm': 0.6457700428301126, 'learning_rate': 2.4762568286405475e-06, 'epoch': 0.68} + 68%|██████▊ | 8269/12188 [17:41:18<7:52:11, 7.23s/it] 68%|██████▊ | 8270/12188 [17:41:25<7:46:48, 7.15s/it] {'loss': 0.2841, 'grad_norm': 0.6756173437320291, 'learning_rate': 2.4751098904988413e-06, 'epoch': 0.68} + 68%|██████▊ | 8270/12188 [17:41:25<7:46:48, 7.15s/it] 68%|██████▊ | 8271/12188 [17:41:32<7:44:27, 7.11s/it] {'loss': 0.3177, 'grad_norm': 0.6296142258765764, 'learning_rate': 2.4739631306608185e-06, 'epoch': 0.68} + 68%|██████▊ | 8271/12188 [17:41:32<7:44:27, 7.11s/it] 68%|██████▊ | 8272/12188 [17:41:39<7:36:38, 7.00s/it] {'loss': 0.2949, 'grad_norm': 0.6099434432353263, 'learning_rate': 2.472816549207463e-06, 'epoch': 0.68} + 68%|██████▊ | 8272/12188 [17:41:39<7:36:38, 7.00s/it] 68%|██████▊ | 8273/12188 [17:41:47<7:53:35, 7.26s/it] {'loss': 0.3717, 'grad_norm': 0.8500895683048043, 'learning_rate': 2.47167014621974e-06, 'epoch': 0.68} + 68%|██████▊ | 8273/12188 [17:41:47<7:53:35, 7.26s/it] 68%|██████▊ | 8274/12188 [17:41:54<7:58:21, 7.33s/it] {'loss': 0.2876, 'grad_norm': 0.6416406401933127, 'learning_rate': 2.4705239217786115e-06, 'epoch': 0.68} + 68%|██████▊ | 8274/12188 [17:41:54<7:58:21, 7.33s/it] 68%|██████▊ | 8275/12188 [17:42:02<8:07:23, 7.47s/it] {'loss': 0.2747, 'grad_norm': 0.7369892876207508, 'learning_rate': 2.4693778759650178e-06, 'epoch': 0.68} + 68%|██████▊ | 8275/12188 [17:42:02<8:07:23, 7.47s/it] 68%|██████▊ | 8276/12188 [17:42:09<7:59:29, 7.35s/it] {'loss': 0.2993, 'grad_norm': 0.7712906588034872, 'learning_rate': 2.4682320088598926e-06, 'epoch': 0.68} + 68%|██████▊ | 8276/12188 [17:42:09<7:59:29, 7.35s/it] 68%|██████▊ | 8277/12188 [17:42:16<7:50:50, 7.22s/it] {'loss': 0.2835, 'grad_norm': 0.689659605421786, 'learning_rate': 2.4670863205441577e-06, 'epoch': 0.68} + 68%|██████▊ | 8277/12188 [17:42:16<7:50:50, 7.22s/it] 68%|██████▊ | 8278/12188 [17:42:23<7:40:36, 7.07s/it] {'loss': 0.3425, 'grad_norm': 0.6933565038496964, 'learning_rate': 2.465940811098717e-06, 'epoch': 0.68} + 68%|██████▊ | 8278/12188 [17:42:23<7:40:36, 7.07s/it] 68%|██████▊ | 8279/12188 [17:42:29<7:36:11, 7.00s/it] {'loss': 0.2979, 'grad_norm': 0.7709572247643542, 'learning_rate': 2.4647954806044633e-06, 'epoch': 0.68} + 68%|██████▊ | 8279/12188 [17:42:29<7:36:11, 7.00s/it] 68%|██████▊ | 8280/12188 [17:42:37<7:44:05, 7.13s/it] {'loss': 0.3126, 'grad_norm': 0.6841993816813264, 'learning_rate': 2.4636503291422816e-06, 'epoch': 0.68} + 68%|██████▊ | 8280/12188 [17:42:37<7:44:05, 7.13s/it] 68%|██████▊ | 8281/12188 [17:42:45<8:03:34, 7.43s/it] {'loss': 0.3288, 'grad_norm': 0.6881894555542035, 'learning_rate': 2.4625053567930364e-06, 'epoch': 0.68} + 68%|██████▊ | 8281/12188 [17:42:45<8:03:34, 7.43s/it] 68%|██████▊ | 8282/12188 [17:42:54<8:41:13, 8.01s/it] {'loss': 0.2854, 'grad_norm': 0.7192565503368499, 'learning_rate': 2.461360563637588e-06, 'epoch': 0.68} + 68%|██████▊ | 8282/12188 [17:42:54<8:41:13, 8.01s/it] 68%|██████▊ | 8283/12188 [17:43:01<8:14:39, 7.60s/it] {'loss': 0.2769, 'grad_norm': 0.6658175153647469, 'learning_rate': 2.4602159497567764e-06, 'epoch': 0.68} + 68%|██████▊ | 8283/12188 [17:43:01<8:14:39, 7.60s/it] 68%|██████▊ | 8284/12188 [17:43:08<7:56:48, 7.33s/it] {'loss': 0.2703, 'grad_norm': 0.7703067144280286, 'learning_rate': 2.4590715152314352e-06, 'epoch': 0.68} + 68%|██████▊ | 8284/12188 [17:43:08<7:56:48, 7.33s/it] 68%|██████▊ | 8285/12188 [17:43:14<7:43:23, 7.12s/it] {'loss': 0.3166, 'grad_norm': 0.7249328450059153, 'learning_rate': 2.4579272601423803e-06, 'epoch': 0.68} + 68%|██████▊ | 8285/12188 [17:43:14<7:43:23, 7.12s/it] 68%|██████▊ | 8286/12188 [17:43:23<8:19:04, 7.67s/it] {'loss': 0.2821, 'grad_norm': 0.7724776628245068, 'learning_rate': 2.4567831845704177e-06, 'epoch': 0.68} + 68%|██████▊ | 8286/12188 [17:43:23<8:19:04, 7.67s/it] 68%|██████▊ | 8287/12188 [17:43:30<8:08:17, 7.51s/it] {'loss': 0.3484, 'grad_norm': 0.6687423778271976, 'learning_rate': 2.4556392885963426e-06, 'epoch': 0.68} + 68%|██████▊ | 8287/12188 [17:43:30<8:08:17, 7.51s/it] 68%|██████▊ | 8288/12188 [17:43:38<8:02:34, 7.42s/it] {'loss': 0.2938, 'grad_norm': 0.6896025349966145, 'learning_rate': 2.4544955723009313e-06, 'epoch': 0.68} + 68%|██████▊ | 8288/12188 [17:43:38<8:02:34, 7.42s/it] 68%|██████▊ | 8289/12188 [17:43:46<8:21:32, 7.72s/it] {'loss': 0.3174, 'grad_norm': 0.801192311137692, 'learning_rate': 2.4533520357649528e-06, 'epoch': 0.68} + 68%|██████▊ | 8289/12188 [17:43:46<8:21:32, 7.72s/it] 68%|██████▊ | 8290/12188 [17:43:53<8:09:21, 7.53s/it] {'loss': 0.2843, 'grad_norm': 0.661192701486842, 'learning_rate': 2.452208679069164e-06, 'epoch': 0.68} + 68%|██████▊ | 8290/12188 [17:43:53<8:09:21, 7.53s/it] 68%|██████▊ | 8291/12188 [17:44:01<8:09:48, 7.54s/it] {'loss': 0.301, 'grad_norm': 0.6773581270464557, 'learning_rate': 2.451065502294303e-06, 'epoch': 0.68} + 68%|██████▊ | 8291/12188 [17:44:01<8:09:48, 7.54s/it] 68%|██████▊ | 8292/12188 [17:44:08<7:58:01, 7.36s/it] {'loss': 0.2986, 'grad_norm': 0.7270677730668718, 'learning_rate': 2.449922505521103e-06, 'epoch': 0.68} + 68%|██████▊ | 8292/12188 [17:44:08<7:58:01, 7.36s/it] 68%|██████▊ | 8293/12188 [17:44:15<7:51:06, 7.26s/it] {'loss': 0.3237, 'grad_norm': 0.6585108581971382, 'learning_rate': 2.4487796888302784e-06, 'epoch': 0.68} + 68%|██████▊ | 8293/12188 [17:44:15<7:51:06, 7.26s/it] 68%|██████▊ | 8294/12188 [17:44:22<7:49:05, 7.23s/it] {'loss': 0.2963, 'grad_norm': 0.6715504412531779, 'learning_rate': 2.4476370523025316e-06, 'epoch': 0.68} + 68%|██████▊ | 8294/12188 [17:44:22<7:49:05, 7.23s/it] 68%|██████▊ | 8295/12188 [17:44:28<7:35:23, 7.02s/it] {'loss': 0.2947, 'grad_norm': 0.6940328781968662, 'learning_rate': 2.4464945960185577e-06, 'epoch': 0.68} + 68%|██████▊ | 8295/12188 [17:44:28<7:35:23, 7.02s/it] 68%|██████▊ | 8296/12188 [17:44:36<7:45:45, 7.18s/it] {'loss': 0.3164, 'grad_norm': 0.6813385539949587, 'learning_rate': 2.4453523200590307e-06, 'epoch': 0.68} + 68%|██████▊ | 8296/12188 [17:44:36<7:45:45, 7.18s/it] 68%|██████▊ | 8297/12188 [17:44:43<7:40:49, 7.11s/it] {'loss': 0.2829, 'grad_norm': 0.6987162707073588, 'learning_rate': 2.444210224504621e-06, 'epoch': 0.68} + 68%|██████▊ | 8297/12188 [17:44:43<7:40:49, 7.11s/it] 68%|██████▊ | 8298/12188 [17:44:50<7:37:15, 7.05s/it] {'loss': 0.3186, 'grad_norm': 0.7519715415455768, 'learning_rate': 2.4430683094359764e-06, 'epoch': 0.68} + 68%|██████▊ | 8298/12188 [17:44:50<7:37:15, 7.05s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f39f20b8810> +[Try #0] Failed to fetch sample 1110959 in VC:s3://gui/aguvis/aguvis-stage2/amex/images. Exception: cannot identify image file <_io.BytesIO object at 0x7f39f20b8810> +Problematic sample: {'image': 'bf3459bcaf434803a580fcd36cbe71aestep0.png', 'conversations': [{'from': 'human', 'value': '\nPlease generate the next move according to the UI screenshot, the task and previous operations.\n\nTask:\nOpen AP News. Share the link of the first article in the "Business" category\n\nPrevious operations:\nNone'}, {'from': 'gpt', 'value': "\nThe AP News app is visible on the home screen. The goal is to open the AP News app and find the first article in the 'Business' category. Starting by launching the AP News app is the logical first step.\n\n\nTap on the AP News app to open it.\n\n\nterminate(status='success')\n"}]} + 68%|██████▊ | 8299/12188 [17:44:58<7:59:27, 7.40s/it] {'loss': 0.2832, 'grad_norm': 0.6260598682698914, 'learning_rate': 2.4419265749337394e-06, 'epoch': 0.68} + 68%|██████▊ | 8299/12188 [17:44:58<7:59:27, 7.40s/it] 68%|██████▊ | 8300/12188 [17:45:06<8:12:37, 7.60s/it] {'loss': 0.3361, 'grad_norm': 0.6999118022502496, 'learning_rate': 2.4407850210785405e-06, 'epoch': 0.68} + 68%|██████▊ | 8300/12188 [17:45:06<8:12:37, 7.60s/it] 68%|██████▊ | 8301/12188 [17:45:13<8:04:48, 7.48s/it] {'loss': 0.3165, 'grad_norm': 0.6431132331080818, 'learning_rate': 2.43964364795099e-06, 'epoch': 0.68} + 68%|██████▊ | 8301/12188 [17:45:13<8:04:48, 7.48s/it] 68%|██████▊ | 8302/12188 [17:45:21<7:59:52, 7.41s/it] {'loss': 0.2946, 'grad_norm': 0.6697292638389045, 'learning_rate': 2.4385024556316928e-06, 'epoch': 0.68} + 68%|██████▊ | 8302/12188 [17:45:21<7:59:52, 7.41s/it] 68%|██████▊ | 8303/12188 [17:45:27<7:46:52, 7.21s/it] {'loss': 0.3252, 'grad_norm': 0.7075546407755652, 'learning_rate': 2.4373614442012356e-06, 'epoch': 0.68} + 68%|██████▊ | 8303/12188 [17:45:27<7:46:52, 7.21s/it] 68%|██████▊ | 8304/12188 [17:45:34<7:41:07, 7.12s/it] {'loss': 0.3209, 'grad_norm': 0.6902556591508883, 'learning_rate': 2.4362206137401956e-06, 'epoch': 0.68} + 68%|██████▊ | 8304/12188 [17:45:34<7:41:07, 7.12s/it] 68%|██████▊ | 8305/12188 [17:45:41<7:32:26, 6.99s/it] {'loss': 0.3171, 'grad_norm': 0.6760724596583676, 'learning_rate': 2.4350799643291383e-06, 'epoch': 0.68} + 68%|██████▊ | 8305/12188 [17:45:41<7:32:26, 6.99s/it] 68%|██████▊ | 8306/12188 [17:45:49<8:00:35, 7.43s/it] {'loss': 0.3278, 'grad_norm': 0.7628540191955996, 'learning_rate': 2.4339394960486136e-06, 'epoch': 0.68} + 68%|██████▊ | 8306/12188 [17:45:49<8:00:35, 7.43s/it] 68%|██████▊ | 8307/12188 [17:45:56<7:52:46, 7.31s/it] {'loss': 0.3259, 'grad_norm': 0.6583426595518629, 'learning_rate': 2.432799208979157e-06, 'epoch': 0.68} + 68%|██████▊ | 8307/12188 [17:45:56<7:52:46, 7.31s/it] 68%|██████▊ | 8308/12188 [17:46:04<7:55:48, 7.36s/it] {'loss': 0.2874, 'grad_norm': 0.7027886147142653, 'learning_rate': 2.4316591032012977e-06, 'epoch': 0.68} + 68%|██████▊ | 8308/12188 [17:46:04<7:55:48, 7.36s/it] 68%|██████▊ | 8309/12188 [17:46:11<7:49:11, 7.26s/it] {'loss': 0.3335, 'grad_norm': 0.750736202561974, 'learning_rate': 2.4305191787955445e-06, 'epoch': 0.68} + 68%|██████▊ | 8309/12188 [17:46:11<7:49:11, 7.26s/it] 68%|██████▊ | 8310/12188 [17:46:18<7:38:59, 7.10s/it] {'loss': 0.3215, 'grad_norm': 0.7219383590482706, 'learning_rate': 2.4293794358424e-06, 'epoch': 0.68} + 68%|██████▊ | 8310/12188 [17:46:18<7:38:59, 7.10s/it] 68%|██████▊ | 8311/12188 [17:46:24<7:33:12, 7.01s/it] {'loss': 0.2966, 'grad_norm': 0.8043112464291104, 'learning_rate': 2.4282398744223483e-06, 'epoch': 0.68} + 68%|██████▊ | 8311/12188 [17:46:24<7:33:12, 7.01s/it] 68%|██████▊ | 8312/12188 [17:46:32<7:40:42, 7.13s/it] {'loss': 0.3059, 'grad_norm': 0.6941623164994203, 'learning_rate': 2.427100494615865e-06, 'epoch': 0.68} + 68%|██████▊ | 8312/12188 [17:46:32<7:40:42, 7.13s/it] 68%|██████▊ | 8313/12188 [17:46:40<8:07:43, 7.55s/it] {'loss': 0.2899, 'grad_norm': 0.6199747127897641, 'learning_rate': 2.4259612965034124e-06, 'epoch': 0.68} + 68%|██████▊ | 8313/12188 [17:46:40<8:07:43, 7.55s/it] 68%|██████▊ | 8314/12188 [17:46:47<7:55:28, 7.36s/it] {'loss': 0.3023, 'grad_norm': 0.7325916337038134, 'learning_rate': 2.4248222801654357e-06, 'epoch': 0.68} + 68%|██████▊ | 8314/12188 [17:46:47<7:55:28, 7.36s/it] 68%|██████▊ | 8315/12188 [17:46:55<8:06:20, 7.53s/it] {'loss': 0.3416, 'grad_norm': 0.668263593361036, 'learning_rate': 2.423683445682374e-06, 'epoch': 0.68} + 68%|██████▊ | 8315/12188 [17:46:55<8:06:20, 7.53s/it] 68%|██████▊ | 8316/12188 [17:47:02<7:54:23, 7.35s/it] {'loss': 0.3151, 'grad_norm': 0.609868872710549, 'learning_rate': 2.4225447931346476e-06, 'epoch': 0.68} + 68%|██████▊ | 8316/12188 [17:47:02<7:54:23, 7.35s/it] 68%|██████▊ | 8317/12188 [17:47:09<7:45:23, 7.21s/it] {'loss': 0.2854, 'grad_norm': 0.6834415896438135, 'learning_rate': 2.4214063226026657e-06, 'epoch': 0.68} + 68%|██████▊ | 8317/12188 [17:47:09<7:45:23, 7.21s/it] 68%|██████▊ | 8318/12188 [17:47:16<7:49:49, 7.28s/it] {'loss': 0.2777, 'grad_norm': 0.6928299052283201, 'learning_rate': 2.4202680341668285e-06, 'epoch': 0.68} + 68%|██████▊ | 8318/12188 [17:47:16<7:49:49, 7.28s/it] 68%|██████▊ | 8319/12188 [17:47:23<7:36:09, 7.07s/it] {'loss': 0.3643, 'grad_norm': 0.7521309887366998, 'learning_rate': 2.4191299279075163e-06, 'epoch': 0.68} + 68%|██████▊ | 8319/12188 [17:47:23<7:36:09, 7.07s/it] 68%|██████▊ | 8320/12188 [17:47:30<7:30:36, 6.99s/it] {'loss': 0.2724, 'grad_norm': 0.6764106214986015, 'learning_rate': 2.417992003905104e-06, 'epoch': 0.68} + 68%|██████▊ | 8320/12188 [17:47:30<7:30:36, 6.99s/it] 68%|██████▊ | 8321/12188 [17:47:37<7:41:21, 7.16s/it] {'loss': 0.2776, 'grad_norm': 0.6855029205994069, 'learning_rate': 2.4168542622399483e-06, 'epoch': 0.68} + 68%|██████▊ | 8321/12188 [17:47:37<7:41:21, 7.16s/it] 68%|██████▊ | 8322/12188 [17:47:45<7:44:58, 7.22s/it] {'loss': 0.3122, 'grad_norm': 0.7360611363073749, 'learning_rate': 2.415716702992392e-06, 'epoch': 0.68} + 68%|██████▊ | 8322/12188 [17:47:45<7:44:58, 7.22s/it] 68%|██████▊ | 8323/12188 [17:47:51<7:33:19, 7.04s/it] {'loss': 0.2844, 'grad_norm': 0.8333760979942669, 'learning_rate': 2.4145793262427727e-06, 'epoch': 0.68} + 68%|██████▊ | 8323/12188 [17:47:51<7:33:19, 7.04s/it] 68%|██████▊ | 8324/12188 [17:47:58<7:29:26, 6.98s/it] {'loss': 0.3397, 'grad_norm': 0.7318740927046636, 'learning_rate': 2.4134421320714053e-06, 'epoch': 0.68} + 68%|██████▊ | 8324/12188 [17:47:58<7:29:26, 6.98s/it] 68%|██████▊ | 8325/12188 [17:48:05<7:34:51, 7.06s/it] {'loss': 0.2862, 'grad_norm': 0.7597235428267722, 'learning_rate': 2.412305120558599e-06, 'epoch': 0.68} + 68%|██████▊ | 8325/12188 [17:48:05<7:34:51, 7.06s/it] 68%|██████▊ | 8326/12188 [17:48:13<7:36:17, 7.09s/it] {'loss': 0.3016, 'grad_norm': 0.7132244207181545, 'learning_rate': 2.411168291784649e-06, 'epoch': 0.68} + 68%|██████▊ | 8326/12188 [17:48:13<7:36:17, 7.09s/it] 68%|██████▊ | 8327/12188 [17:48:20<7:35:23, 7.08s/it] {'loss': 0.326, 'grad_norm': 0.7374787822613273, 'learning_rate': 2.410031645829834e-06, 'epoch': 0.68} + 68%|██████▊ | 8327/12188 [17:48:20<7:35:23, 7.08s/it] 68%|██████▊ | 8328/12188 [17:48:26<7:28:30, 6.97s/it] {'loss': 0.3067, 'grad_norm': 0.7120550674775281, 'learning_rate': 2.4088951827744245e-06, 'epoch': 0.68} + 68%|██████▊ | 8328/12188 [17:48:26<7:28:30, 6.97s/it] 68%|██████▊ | 8329/12188 [17:48:33<7:30:17, 7.00s/it] {'loss': 0.2983, 'grad_norm': 0.6674611224736968, 'learning_rate': 2.4077589026986724e-06, 'epoch': 0.68} + 68%|██████▊ | 8329/12188 [17:48:33<7:30:17, 7.00s/it] 68%|██████▊ | 8330/12188 [17:48:40<7:20:58, 6.86s/it] {'loss': 0.3079, 'grad_norm': 0.7619781782714684, 'learning_rate': 2.4066228056828216e-06, 'epoch': 0.68} + 68%|██████▊ | 8330/12188 [17:48:40<7:20:58, 6.86s/it] 68%|██████▊ | 8331/12188 [17:48:48<7:40:46, 7.17s/it] {'loss': 0.3062, 'grad_norm': 0.728214271941948, 'learning_rate': 2.405486891807104e-06, 'epoch': 0.68} + 68%|██████▊ | 8331/12188 [17:48:48<7:40:46, 7.17s/it] 68%|██████▊ | 8332/12188 [17:48:55<7:42:30, 7.20s/it] {'loss': 0.2637, 'grad_norm': 0.6574215077108687, 'learning_rate': 2.4043511611517315e-06, 'epoch': 0.68} + 68%|██████▊ | 8332/12188 [17:48:55<7:42:30, 7.20s/it] 68%|██████▊ | 8333/12188 [17:49:04<8:17:24, 7.74s/it] {'loss': 0.3235, 'grad_norm': 0.6976390764169955, 'learning_rate': 2.4032156137969122e-06, 'epoch': 0.68} + 68%|██████▊ | 8333/12188 [17:49:04<8:17:24, 7.74s/it] 68%|██████▊ | 8334/12188 [17:49:11<8:04:15, 7.54s/it] {'loss': 0.3288, 'grad_norm': 0.6724593117775969, 'learning_rate': 2.4020802498228333e-06, 'epoch': 0.68} + 68%|██████▊ | 8334/12188 [17:49:11<8:04:15, 7.54s/it] 68%|██████▊ | 8335/12188 [17:49:18<7:51:38, 7.34s/it] {'loss': 0.2874, 'grad_norm': 0.7293970126635487, 'learning_rate': 2.400945069309672e-06, 'epoch': 0.68} + 68%|██████▊ | 8335/12188 [17:49:18<7:51:38, 7.34s/it] 68%|██████▊ | 8336/12188 [17:49:25<7:41:26, 7.19s/it] {'loss': 0.3252, 'grad_norm': 0.7539126545064425, 'learning_rate': 2.399810072337596e-06, 'epoch': 0.68} + 68%|██████▊ | 8336/12188 [17:49:25<7:41:26, 7.19s/it] 68%|██████▊ | 8337/12188 [17:49:34<8:13:36, 7.69s/it] {'loss': 0.2769, 'grad_norm': 0.6846004949489112, 'learning_rate': 2.3986752589867534e-06, 'epoch': 0.68} + 68%|██████▊ | 8337/12188 [17:49:34<8:13:36, 7.69s/it] 68%|██████▊ | 8338/12188 [17:49:41<8:02:24, 7.52s/it] {'loss': 0.3026, 'grad_norm': 0.7011799536026369, 'learning_rate': 2.3975406293372866e-06, 'epoch': 0.68} + 68%|██████▊ | 8338/12188 [17:49:41<8:02:24, 7.52s/it] 68%|██████▊ | 8339/12188 [17:49:48<7:50:49, 7.34s/it] {'loss': 0.2741, 'grad_norm': 0.7119134902843873, 'learning_rate': 2.3964061834693174e-06, 'epoch': 0.68} + 68%|██████▊ | 8339/12188 [17:49:48<7:50:49, 7.34s/it] 68%|██████▊ | 8340/12188 [17:49:56<7:59:03, 7.47s/it] {'loss': 0.3138, 'grad_norm': 0.8849286288090471, 'learning_rate': 2.3952719214629605e-06, 'epoch': 0.68} + 68%|██████▊ | 8340/12188 [17:49:56<7:59:03, 7.47s/it] 68%|██████▊ | 8341/12188 [17:50:02<7:45:55, 7.27s/it] {'loss': 0.2903, 'grad_norm': 4.984012995523921, 'learning_rate': 2.3941378433983175e-06, 'epoch': 0.68} + 68%|██████▊ | 8341/12188 [17:50:02<7:45:55, 7.27s/it] 68%|██████▊ | 8342/12188 [17:50:09<7:38:07, 7.15s/it] {'loss': 0.2969, 'grad_norm': 0.628296612426894, 'learning_rate': 2.3930039493554714e-06, 'epoch': 0.68} + 68%|██████▊ | 8342/12188 [17:50:09<7:38:07, 7.15s/it] 68%|██████▊ | 8343/12188 [17:50:17<7:41:54, 7.21s/it] {'loss': 0.2891, 'grad_norm': 0.6539941355604384, 'learning_rate': 2.391870239414498e-06, 'epoch': 0.68} + 68%|██████▊ | 8343/12188 [17:50:17<7:41:54, 7.21s/it] 68%|██████▊ | 8344/12188 [17:50:24<7:38:44, 7.16s/it] {'loss': 0.2819, 'grad_norm': 0.6857007650979102, 'learning_rate': 2.39073671365546e-06, 'epoch': 0.68} + 68%|██████▊ | 8344/12188 [17:50:24<7:38:44, 7.16s/it] 68%|██████▊ | 8345/12188 [17:50:32<7:57:20, 7.45s/it] {'loss': 0.2777, 'grad_norm': 0.7300873636236981, 'learning_rate': 2.389603372158401e-06, 'epoch': 0.68} + 68%|██████▊ | 8345/12188 [17:50:32<7:57:20, 7.45s/it] 68%|██████▊ | 8346/12188 [17:50:39<7:59:26, 7.49s/it] {'loss': 0.3513, 'grad_norm': 0.6492972252259237, 'learning_rate': 2.38847021500336e-06, 'epoch': 0.68} + 68%|██████▊ | 8346/12188 [17:50:39<7:59:26, 7.49s/it] 68%|██████▊ | 8347/12188 [17:50:46<7:48:07, 7.31s/it] {'loss': 0.2978, 'grad_norm': 0.729383544593263, 'learning_rate': 2.387337242270355e-06, 'epoch': 0.68} + 68%|██████▊ | 8347/12188 [17:50:46<7:48:07, 7.31s/it] 68%|██████▊ | 8348/12188 [17:50:54<7:59:43, 7.50s/it] {'loss': 0.3251, 'grad_norm': 0.6444616493094725, 'learning_rate': 2.386204454039398e-06, 'epoch': 0.68} + 68%|██████▊ | 8348/12188 [17:50:54<7:59:43, 7.50s/it] 69%|██████▊ | 8349/12188 [17:51:03<8:16:53, 7.77s/it] {'loss': 0.3178, 'grad_norm': 0.7346164978447326, 'learning_rate': 2.3850718503904835e-06, 'epoch': 0.68} + 69%|██████▊ | 8349/12188 [17:51:03<8:16:53, 7.77s/it] 69%|██████▊ | 8350/12188 [17:51:10<8:00:49, 7.52s/it] {'loss': 0.2803, 'grad_norm': 0.7114100551734897, 'learning_rate': 2.3839394314035915e-06, 'epoch': 0.69} + 69%|██████▊ | 8350/12188 [17:51:10<8:00:49, 7.52s/it] 69%|██████▊ | 8351/12188 [17:51:16<7:46:04, 7.29s/it] {'loss': 0.3296, 'grad_norm': 0.687441984013582, 'learning_rate': 2.382807197158696e-06, 'epoch': 0.69} + 69%|██████▊ | 8351/12188 [17:51:16<7:46:04, 7.29s/it] 69%|██████▊ | 8352/12188 [17:51:23<7:44:31, 7.27s/it] {'loss': 0.3173, 'grad_norm': 0.7256977952435328, 'learning_rate': 2.381675147735749e-06, 'epoch': 0.69} + 69%|██████▊ | 8352/12188 [17:51:23<7:44:31, 7.27s/it] 69%|██████▊ | 8353/12188 [17:51:31<7:40:57, 7.21s/it] {'loss': 0.3507, 'grad_norm': 0.7534423490819239, 'learning_rate': 2.380543283214697e-06, 'epoch': 0.69} + 69%|██████▊ | 8353/12188 [17:51:31<7:40:57, 7.21s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 69%|██████▊ | 8354/12188 [17:51:37<7:22:56, 6.93s/it] {'loss': 0.6187, 'grad_norm': 0.6052849423802877, 'learning_rate': 2.379411603675471e-06, 'epoch': 0.69} + 69%|██████▊ | 8354/12188 [17:51:37<7:22:56, 6.93s/it] 69%|██████▊ | 8355/12188 [17:51:45<7:49:20, 7.35s/it] {'loss': 0.3184, 'grad_norm': 0.6693732846388519, 'learning_rate': 2.378280109197986e-06, 'epoch': 0.69} + 69%|██████▊ | 8355/12188 [17:51:45<7:49:20, 7.35s/it] 69%|██████▊ | 8356/12188 [17:51:52<7:43:00, 7.25s/it] {'loss': 0.3161, 'grad_norm': 0.6991019614950318, 'learning_rate': 2.377148799862149e-06, 'epoch': 0.69} + 69%|██████▊ | 8356/12188 [17:51:52<7:43:00, 7.25s/it] 69%|██████▊ | 8357/12188 [17:52:00<7:46:50, 7.31s/it] {'loss': 0.2946, 'grad_norm': 0.6758131587398727, 'learning_rate': 2.376017675747848e-06, 'epoch': 0.69} + 69%|██████▊ | 8357/12188 [17:52:00<7:46:50, 7.31s/it] 69%|██████▊ | 8358/12188 [17:52:07<7:56:17, 7.46s/it] {'loss': 0.3114, 'grad_norm': 0.7505729893028888, 'learning_rate': 2.374886736934963e-06, 'epoch': 0.69} + 69%|██████▊ | 8358/12188 [17:52:07<7:56:17, 7.46s/it] 69%|██████▊ | 8359/12188 [17:52:15<7:48:13, 7.34s/it] {'loss': 0.2873, 'grad_norm': 0.6154745280762708, 'learning_rate': 2.3737559835033613e-06, 'epoch': 0.69} + 69%|██████▊ | 8359/12188 [17:52:15<7:48:13, 7.34s/it] 69%|██████▊ | 8360/12188 [17:52:22<7:45:10, 7.29s/it] {'loss': 0.2975, 'grad_norm': 0.7488275407516267, 'learning_rate': 2.372625415532891e-06, 'epoch': 0.69} + 69%|██████▊ | 8360/12188 [17:52:22<7:45:10, 7.29s/it] 69%|██████▊ | 8361/12188 [17:52:29<7:39:46, 7.21s/it] {'loss': 0.3004, 'grad_norm': 0.6363393887657729, 'learning_rate': 2.3714950331033937e-06, 'epoch': 0.69} + 69%|██████▊ | 8361/12188 [17:52:29<7:39:46, 7.21s/it] 69%|██████▊ | 8362/12188 [17:52:37<7:52:23, 7.41s/it] {'loss': 0.3199, 'grad_norm': 0.7129044508133185, 'learning_rate': 2.3703648362946948e-06, 'epoch': 0.69} + 69%|██████▊ | 8362/12188 [17:52:37<7:52:23, 7.41s/it] 69%|██████▊ | 8363/12188 [17:52:46<8:36:12, 8.10s/it] {'loss': 0.329, 'grad_norm': 0.7511311743378948, 'learning_rate': 2.3692348251866045e-06, 'epoch': 0.69} + 69%|██████▊ | 8363/12188 [17:52:46<8:36:12, 8.10s/it] 69%|██████▊ | 8364/12188 [17:52:53<8:07:09, 7.64s/it] {'loss': 0.2817, 'grad_norm': 0.755186784984683, 'learning_rate': 2.3681049998589256e-06, 'epoch': 0.69} + 69%|██████▊ | 8364/12188 [17:52:53<8:07:09, 7.64s/it] 69%|██████▊ | 8365/12188 [17:53:00<7:48:32, 7.35s/it] {'loss': 0.2845, 'grad_norm': 0.6646631003339217, 'learning_rate': 2.3669753603914414e-06, 'epoch': 0.69} + 69%|██████▊ | 8365/12188 [17:53:00<7:48:32, 7.35s/it] 69%|██████▊ | 8366/12188 [17:53:07<7:48:08, 7.35s/it] {'loss': 0.2842, 'grad_norm': 0.7699812523270269, 'learning_rate': 2.365845906863927e-06, 'epoch': 0.69} + 69%|██████▊ | 8366/12188 [17:53:07<7:48:08, 7.35s/it] 69%|██████▊ | 8367/12188 [17:53:13<7:32:36, 7.11s/it] {'loss': 0.2977, 'grad_norm': 0.7009009421981954, 'learning_rate': 2.3647166393561442e-06, 'epoch': 0.69} + 69%|██████▊ | 8367/12188 [17:53:13<7:32:36, 7.11s/it] 69%|██████▊ | 8368/12188 [17:53:21<7:43:50, 7.29s/it] {'loss': 0.2798, 'grad_norm': 0.6868733216544639, 'learning_rate': 2.3635875579478373e-06, 'epoch': 0.69} + 69%|██████▊ | 8368/12188 [17:53:21<7:43:50, 7.29s/it] 69%|██████▊ | 8369/12188 [17:53:31<8:26:01, 7.95s/it] {'loss': 0.2611, 'grad_norm': 0.6179613650778729, 'learning_rate': 2.362458662718743e-06, 'epoch': 0.69} + 69%|██████▊ | 8369/12188 [17:53:31<8:26:01, 7.95s/it] 69%|██████▊ | 8370/12188 [17:53:38<8:08:54, 7.68s/it] {'loss': 0.3048, 'grad_norm': 0.6522440106612896, 'learning_rate': 2.361329953748578e-06, 'epoch': 0.69} + 69%|██████▊ | 8370/12188 [17:53:38<8:08:54, 7.68s/it] 69%|██████▊ | 8371/12188 [17:53:46<8:12:55, 7.75s/it] {'loss': 0.3088, 'grad_norm': 0.6273403478604974, 'learning_rate': 2.3602014311170524e-06, 'epoch': 0.69} + 69%|██████▊ | 8371/12188 [17:53:46<8:12:55, 7.75s/it] 69%|██████▊ | 8372/12188 [17:53:52<7:56:03, 7.49s/it] {'loss': 0.3126, 'grad_norm': 0.6395300013210247, 'learning_rate': 2.3590730949038625e-06, 'epoch': 0.69} + 69%|██████▊ | 8372/12188 [17:53:52<7:56:03, 7.49s/it] 69%|██████▊ | 8373/12188 [17:53:59<7:37:05, 7.19s/it] {'loss': 0.3115, 'grad_norm': 0.6721268394649139, 'learning_rate': 2.357944945188686e-06, 'epoch': 0.69} + 69%|██████▊ | 8373/12188 [17:53:59<7:37:05, 7.19s/it] 69%|██████▊ | 8374/12188 [17:54:06<7:34:38, 7.15s/it] {'loss': 0.3129, 'grad_norm': 0.6369462091427557, 'learning_rate': 2.356816982051195e-06, 'epoch': 0.69} + 69%|██████▊ | 8374/12188 [17:54:06<7:34:38, 7.15s/it] 69%|██████▊ | 8375/12188 [17:54:14<7:48:29, 7.37s/it] {'loss': 0.3119, 'grad_norm': 0.6774249195154234, 'learning_rate': 2.3556892055710394e-06, 'epoch': 0.69} + 69%|██████▊ | 8375/12188 [17:54:14<7:48:29, 7.37s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 69%|██████▊ | 8376/12188 [17:54:19<7:14:15, 6.84s/it] {'loss': 0.6369, 'grad_norm': 0.5859881242659029, 'learning_rate': 2.354561615827866e-06, 'epoch': 0.69} + 69%|██████▊ | 8376/12188 [17:54:20<7:14:15, 6.84s/it] 69%|██████▊ | 8377/12188 [17:54:27<7:31:10, 7.10s/it] {'loss': 0.3126, 'grad_norm': 0.6629160921274511, 'learning_rate': 2.353434212901301e-06, 'epoch': 0.69} + 69%|██████▊ | 8377/12188 [17:54:27<7:31:10, 7.10s/it] 69%|██████▊ | 8378/12188 [17:54:36<8:02:52, 7.60s/it] {'loss': 0.3234, 'grad_norm': 0.7177084658276051, 'learning_rate': 2.3523069968709577e-06, 'epoch': 0.69} + 69%|██████▊ | 8378/12188 [17:54:36<8:02:52, 7.60s/it] 69%|██████▊ | 8379/12188 [17:54:43<7:58:28, 7.54s/it] {'loss': 0.2955, 'grad_norm': 0.6750213548446535, 'learning_rate': 2.351179967816441e-06, 'epoch': 0.69} + 69%|██████▊ | 8379/12188 [17:54:43<7:58:28, 7.54s/it] 69%|██████▉ | 8380/12188 [17:54:53<8:37:55, 8.16s/it] {'loss': 0.2809, 'grad_norm': 0.6712082416814205, 'learning_rate': 2.350053125817341e-06, 'epoch': 0.69} + 69%|██████▉ | 8380/12188 [17:54:53<8:37:55, 8.16s/it] 69%|██████▉ | 8381/12188 [17:55:00<8:19:17, 7.87s/it] {'loss': 0.3291, 'grad_norm': 0.7075057205270392, 'learning_rate': 2.3489264709532294e-06, 'epoch': 0.69} + 69%|██████▉ | 8381/12188 [17:55:00<8:19:17, 7.87s/it] 69%|██████▉ | 8382/12188 [17:55:08<8:13:30, 7.78s/it] {'loss': 0.2874, 'grad_norm': 0.6456892834411921, 'learning_rate': 2.3478000033036735e-06, 'epoch': 0.69} + 69%|██████▉ | 8382/12188 [17:55:08<8:13:30, 7.78s/it] 69%|██████▉ | 8383/12188 [17:55:15<8:02:12, 7.60s/it] {'loss': 0.2762, 'grad_norm': 0.6280025663581629, 'learning_rate': 2.3466737229482183e-06, 'epoch': 0.69} + 69%|██████▉ | 8383/12188 [17:55:15<8:02:12, 7.60s/it] 69%|██████▉ | 8384/12188 [17:55:24<8:24:45, 7.96s/it] {'loss': 0.2637, 'grad_norm': 0.6841597653661252, 'learning_rate': 2.345547629966402e-06, 'epoch': 0.69} + 69%|██████▉ | 8384/12188 [17:55:24<8:24:45, 7.96s/it] 69%|██████▉ | 8385/12188 [17:55:31<8:11:43, 7.76s/it] {'loss': 0.284, 'grad_norm': 0.6836937858614035, 'learning_rate': 2.344421724437749e-06, 'epoch': 0.69} + 69%|██████▉ | 8385/12188 [17:55:31<8:11:43, 7.76s/it] 69%|██████▉ | 8386/12188 [17:55:38<7:56:43, 7.52s/it] {'loss': 0.3222, 'grad_norm': 0.6717997290561433, 'learning_rate': 2.343296006441766e-06, 'epoch': 0.69} + 69%|██████▉ | 8386/12188 [17:55:38<7:56:43, 7.52s/it] 69%|██████▉ | 8387/12188 [17:55:45<7:43:27, 7.32s/it] {'loss': 0.3077, 'grad_norm': 0.7721185283749203, 'learning_rate': 2.342170476057952e-06, 'epoch': 0.69} + 69%|██████▉ | 8387/12188 [17:55:45<7:43:27, 7.32s/it] 69%|██████��� | 8388/12188 [17:55:53<7:53:37, 7.48s/it] {'loss': 0.331, 'grad_norm': 0.655219148359471, 'learning_rate': 2.3410451333657876e-06, 'epoch': 0.69} + 69%|██████▉ | 8388/12188 [17:55:53<7:53:37, 7.48s/it] 69%|██████▉ | 8389/12188 [17:56:00<7:58:00, 7.55s/it] {'loss': 0.3083, 'grad_norm': 0.6939437390465069, 'learning_rate': 2.3399199784447453e-06, 'epoch': 0.69} + 69%|██████▉ | 8389/12188 [17:56:00<7:58:00, 7.55s/it] 69%|██████▉ | 8390/12188 [17:56:07<7:43:29, 7.32s/it] {'loss': 0.3202, 'grad_norm': 0.7185900073546676, 'learning_rate': 2.3387950113742803e-06, 'epoch': 0.69} + 69%|██████▉ | 8390/12188 [17:56:07<7:43:29, 7.32s/it] 69%|██████▉ | 8391/12188 [17:56:15<8:00:17, 7.59s/it] {'loss': 0.2883, 'grad_norm': 0.8192364709046628, 'learning_rate': 2.3376702322338346e-06, 'epoch': 0.69} + 69%|██████▉ | 8391/12188 [17:56:15<8:00:17, 7.59s/it] 69%|██████▉ | 8392/12188 [17:56:23<7:56:56, 7.54s/it] {'loss': 0.2902, 'grad_norm': 0.6060552913251196, 'learning_rate': 2.3365456411028394e-06, 'epoch': 0.69} + 69%|██████▉ | 8392/12188 [17:56:23<7:56:56, 7.54s/it] 69%|██████▉ | 8393/12188 [17:56:29<7:39:35, 7.27s/it] {'loss': 0.3075, 'grad_norm': 0.7096432116413979, 'learning_rate': 2.335421238060714e-06, 'epoch': 0.69} + 69%|██████▉ | 8393/12188 [17:56:29<7:39:35, 7.27s/it] 69%|██████▉ | 8394/12188 [17:56:36<7:33:41, 7.17s/it] {'loss': 0.2883, 'grad_norm': 0.6702899098792274, 'learning_rate': 2.3342970231868573e-06, 'epoch': 0.69} + 69%|██████▉ | 8394/12188 [17:56:36<7:33:41, 7.17s/it] 69%|██████▉ | 8395/12188 [17:56:43<7:28:51, 7.10s/it] {'loss': 0.3203, 'grad_norm': 0.7406051329852473, 'learning_rate': 2.333172996560664e-06, 'epoch': 0.69} + 69%|██████▉ | 8395/12188 [17:56:43<7:28:51, 7.10s/it] 69%|██████▉ | 8396/12188 [17:56:51<7:31:52, 7.15s/it] {'loss': 0.3061, 'grad_norm': 0.6734142752386051, 'learning_rate': 2.332049158261508e-06, 'epoch': 0.69} + 69%|██████▉ | 8396/12188 [17:56:51<7:31:52, 7.15s/it] 69%|██████▉ | 8397/12188 [17:56:58<7:30:34, 7.13s/it] {'loss': 0.2941, 'grad_norm': 0.6591955959097712, 'learning_rate': 2.330925508368754e-06, 'epoch': 0.69} + 69%|██████▉ | 8397/12188 [17:56:58<7:30:34, 7.13s/it] 69%|██████▉ | 8398/12188 [17:57:06<7:44:28, 7.35s/it] {'loss': 0.2739, 'grad_norm': 0.6907204613312792, 'learning_rate': 2.329802046961753e-06, 'epoch': 0.69} + 69%|██████▉ | 8398/12188 [17:57:06<7:44:28, 7.35s/it] 69%|██████▉ | 8399/12188 [17:57:12<7:31:57, 7.16s/it] {'loss': 0.2744, 'grad_norm': 0.6543796618349068, 'learning_rate': 2.328678774119841e-06, 'epoch': 0.69} + 69%|██████▉ | 8399/12188 [17:57:12<7:31:57, 7.16s/it] 69%|██████▉ | 8400/12188 [17:57:21<7:54:18, 7.51s/it] {'loss': 0.3246, 'grad_norm': 0.6984399513185185, 'learning_rate': 2.3275556899223435e-06, 'epoch': 0.69} + 69%|██████▉ | 8400/12188 [17:57:21<7:54:18, 7.51s/it] 69%|██████▉ | 8401/12188 [17:57:29<8:14:02, 7.83s/it] {'loss': 0.3134, 'grad_norm': 0.6654122757956571, 'learning_rate': 2.3264327944485675e-06, 'epoch': 0.69} + 69%|██████▉ | 8401/12188 [17:57:29<8:14:02, 7.83s/it] 69%|██████▉ | 8402/12188 [17:57:36<7:54:37, 7.52s/it] {'loss': 0.3486, 'grad_norm': 0.6521645585444783, 'learning_rate': 2.3253100877778145e-06, 'epoch': 0.69} + 69%|██████▉ | 8402/12188 [17:57:36<7:54:37, 7.52s/it] 69%|██████▉ | 8403/12188 [17:57:43<7:50:30, 7.46s/it] {'loss': 0.3125, 'grad_norm': 0.6964565028404341, 'learning_rate': 2.3241875699893635e-06, 'epoch': 0.69} + 69%|██████▉ | 8403/12188 [17:57:43<7:50:30, 7.46s/it] 69%|██████▉ | 8404/12188 [17:57:51<7:46:26, 7.40s/it] {'loss': 0.2954, 'grad_norm': 0.7321331046639769, 'learning_rate': 2.323065241162489e-06, 'epoch': 0.69} + 69%|██████▉ | 8404/12188 [17:57:51<7:46:26, 7.40s/it] 69%|██████▉ | 8405/12188 [17:57:57<7:32:34, 7.18s/it] {'loss': 0.3058, 'grad_norm': 0.6860738215173128, 'learning_rate': 2.321943101376447e-06, 'epoch': 0.69} + 69%|██████▉ | 8405/12188 [17:57:57<7:32:34, 7.18s/it] 69%|██████▉ | 8406/12188 [17:58:05<7:35:18, 7.22s/it] {'loss': 0.2894, 'grad_norm': 0.652300870071461, 'learning_rate': 2.3208211507104787e-06, 'epoch': 0.69} + 69%|██████▉ | 8406/12188 [17:58:05<7:35:18, 7.22s/it] 69%|██████▉ | 8407/12188 [17:58:12<7:48:31, 7.43s/it] {'loss': 0.311, 'grad_norm': 0.733998015983069, 'learning_rate': 2.3196993892438165e-06, 'epoch': 0.69} + 69%|██████▉ | 8407/12188 [17:58:12<7:48:31, 7.43s/it] 69%|██████▉ | 8408/12188 [17:58:22<8:35:28, 8.18s/it] {'loss': 0.3284, 'grad_norm': 0.6914687688186681, 'learning_rate': 2.3185778170556785e-06, 'epoch': 0.69} + 69%|██████▉ | 8408/12188 [17:58:22<8:35:28, 8.18s/it] 69%|██████▉ | 8409/12188 [17:58:30<8:19:46, 7.94s/it] {'loss': 0.2931, 'grad_norm': 0.7245804164525625, 'learning_rate': 2.3174564342252646e-06, 'epoch': 0.69} + 69%|██████▉ | 8409/12188 [17:58:30<8:19:46, 7.94s/it] 69%|██████▉ | 8410/12188 [17:58:38<8:28:10, 8.07s/it] {'loss': 0.3069, 'grad_norm': 0.7101541334730583, 'learning_rate': 2.3163352408317687e-06, 'epoch': 0.69} + 69%|██████▉ | 8410/12188 [17:58:38<8:28:10, 8.07s/it] 69%|██████▉ | 8411/12188 [17:58:45<8:00:32, 7.63s/it] {'loss': 0.2983, 'grad_norm': 0.6322689495445846, 'learning_rate': 2.315214236954367e-06, 'epoch': 0.69} + 69%|██████▉ | 8411/12188 [17:58:45<8:00:32, 7.63s/it] 69%|██████▉ | 8412/12188 [17:58:54<8:25:13, 8.03s/it] {'loss': 0.3223, 'grad_norm': 0.6784758959593733, 'learning_rate': 2.3140934226722215e-06, 'epoch': 0.69} + 69%|██████▉ | 8412/12188 [17:58:54<8:25:13, 8.03s/it] 69%|██████▉ | 8413/12188 [17:59:02<8:23:33, 8.00s/it] {'loss': 0.2881, 'grad_norm': 0.7165070797637035, 'learning_rate': 2.312972798064485e-06, 'epoch': 0.69} + 69%|██████▉ | 8413/12188 [17:59:02<8:23:33, 8.00s/it] 69%|██████▉ | 8414/12188 [17:59:09<8:12:01, 7.82s/it] {'loss': 0.3018, 'grad_norm': 0.9795618506814592, 'learning_rate': 2.3118523632102907e-06, 'epoch': 0.69} + 69%|██████▉ | 8414/12188 [17:59:09<8:12:01, 7.82s/it] 69%|██████▉ | 8415/12188 [17:59:16<7:58:26, 7.61s/it] {'loss': 0.281, 'grad_norm': 0.6737161617842534, 'learning_rate': 2.310732118188764e-06, 'epoch': 0.69} + 69%|██████▉ | 8415/12188 [17:59:16<7:58:26, 7.61s/it] 69%|██████▉ | 8416/12188 [17:59:23<7:51:17, 7.50s/it] {'loss': 0.3409, 'grad_norm': 0.7252962922188236, 'learning_rate': 2.309612063079015e-06, 'epoch': 0.69} + 69%|██████▉ | 8416/12188 [17:59:23<7:51:17, 7.50s/it] 69%|██████▉ | 8417/12188 [17:59:30<7:39:12, 7.31s/it] {'loss': 0.3023, 'grad_norm': 0.7405399947997914, 'learning_rate': 2.308492197960141e-06, 'epoch': 0.69} + 69%|██████▉ | 8417/12188 [17:59:30<7:39:12, 7.31s/it] 69%|██████▉ | 8418/12188 [17:59:38<7:42:14, 7.36s/it] {'loss': 0.3002, 'grad_norm': 0.6482852553494243, 'learning_rate': 2.307372522911223e-06, 'epoch': 0.69} + 69%|██████▉ | 8418/12188 [17:59:38<7:42:14, 7.36s/it] 69%|██████▉ | 8419/12188 [17:59:45<7:38:31, 7.30s/it] {'loss': 0.2766, 'grad_norm': 0.7761340383932952, 'learning_rate': 2.3062530380113303e-06, 'epoch': 0.69} + 69%|██████▉ | 8419/12188 [17:59:45<7:38:31, 7.30s/it] 69%|██████▉ | 8420/12188 [17:59:52<7:32:14, 7.20s/it] {'loss': 0.335, 'grad_norm': 0.6888861654444001, 'learning_rate': 2.30513374333952e-06, 'epoch': 0.69} + 69%|██████▉ | 8420/12188 [17:59:52<7:32:14, 7.20s/it] 69%|██████▉ | 8421/12188 [17:59:59<7:34:42, 7.24s/it] {'loss': 0.296, 'grad_norm': 0.6917846413032996, 'learning_rate': 2.3040146389748365e-06, 'epoch': 0.69} + 69%|██████▉ | 8421/12188 [17:59:59<7:34:42, 7.24s/it] 69%|██████▉ | 8422/12188 [18:00:07<7:45:51, 7.42s/it] {'loss': 0.2984, 'grad_norm': 0.6051934748856868, 'learning_rate': 2.3028957249963057e-06, 'epoch': 0.69} + 69%|██████▉ | 8422/12188 [18:00:07<7:45:51, 7.42s/it] 69%|██████▉ | 8423/12188 [18:00:14<7:38:42, 7.31s/it] {'loss': 0.3285, 'grad_norm': 0.6999567361788783, 'learning_rate': 2.301777001482948e-06, 'epoch': 0.69} + 69%|██████▉ | 8423/12188 [18:00:14<7:38:42, 7.31s/it] 69%|██████▉ | 8424/12188 [18:00:22<7:40:16, 7.34s/it] {'loss': 0.3186, 'grad_norm': 0.774632852509146, 'learning_rate': 2.300658468513762e-06, 'epoch': 0.69} + 69%|██████▉ | 8424/12188 [18:00:22<7:40:16, 7.34s/it] 69%|██████▉ | 8425/12188 [18:00:29<7:37:10, 7.29s/it] {'loss': 0.3571, 'grad_norm': 0.7590620454978356, 'learning_rate': 2.299540126167737e-06, 'epoch': 0.69} + 69%|██████▉ | 8425/12188 [18:00:29<7:37:10, 7.29s/it] 69%|██████▉ | 8426/12188 [18:00:38<8:09:33, 7.81s/it] {'loss': 0.3173, 'grad_norm': 0.6274022148554714, 'learning_rate': 2.2984219745238524e-06, 'epoch': 0.69} + 69%|██████▉ | 8426/12188 [18:00:38<8:09:33, 7.81s/it] 69%|██████▉ | 8427/12188 [18:00:45<8:01:03, 7.67s/it] {'loss': 0.2884, 'grad_norm': 0.7065588446296904, 'learning_rate': 2.2973040136610652e-06, 'epoch': 0.69} + 69%|██████▉ | 8427/12188 [18:00:45<8:01:03, 7.67s/it] 69%|██████▉ | 8428/12188 [18:00:52<7:41:57, 7.37s/it] {'loss': 0.2797, 'grad_norm': 0.7137615471729695, 'learning_rate': 2.2961862436583266e-06, 'epoch': 0.69} + 69%|██████▉ | 8428/12188 [18:00:52<7:41:57, 7.37s/it] 69%|██████▉ | 8429/12188 [18:01:11<11:23:24, 10.91s/it] {'loss': 0.3216, 'grad_norm': 0.6907114399849358, 'learning_rate': 2.295068664594573e-06, 'epoch': 0.69} + 69%|██████▉ | 8429/12188 [18:01:11<11:23:24, 10.91s/it] 69%|██████▉ | 8430/12188 [18:01:18<10:07:00, 9.69s/it] {'loss': 0.3036, 'grad_norm': 0.7028024497611337, 'learning_rate': 2.2939512765487247e-06, 'epoch': 0.69} + 69%|██████▉ | 8430/12188 [18:01:18<10:07:00, 9.69s/it] 69%|██████▉ | 8431/12188 [18:01:25<9:11:49, 8.81s/it] {'loss': 0.2868, 'grad_norm': 0.7238378974480834, 'learning_rate': 2.292834079599688e-06, 'epoch': 0.69} + 69%|██████▉ | 8431/12188 [18:01:25<9:11:49, 8.81s/it] 69%|██████▉ | 8432/12188 [18:01:32<8:38:53, 8.29s/it] {'loss': 0.2673, 'grad_norm': 0.6343720710879647, 'learning_rate': 2.2917170738263607e-06, 'epoch': 0.69} + 69%|██████▉ | 8432/12188 [18:01:32<8:38:53, 8.29s/it] 69%|██████▉ | 8433/12188 [18:01:38<8:09:31, 7.82s/it] {'loss': 0.2961, 'grad_norm': 0.6703199126717517, 'learning_rate': 2.290600259307621e-06, 'epoch': 0.69} + 69%|██████▉ | 8433/12188 [18:01:38<8:09:31, 7.82s/it] 69%|██████▉ | 8434/12188 [18:01:46<8:12:24, 7.87s/it] {'loss': 0.3334, 'grad_norm': 0.7031926368665418, 'learning_rate': 2.2894836361223393e-06, 'epoch': 0.69} + 69%|██████▉ | 8434/12188 [18:01:46<8:12:24, 7.87s/it] 69%|██████▉ | 8435/12188 [18:01:54<8:04:50, 7.75s/it] {'loss': 0.3132, 'grad_norm': 0.6585703058593593, 'learning_rate': 2.2883672043493672e-06, 'epoch': 0.69} + 69%|██████▉ | 8435/12188 [18:01:54<8:04:50, 7.75s/it] 69%|██████▉ | 8436/12188 [18:02:01<8:00:12, 7.68s/it] {'loss': 0.3253, 'grad_norm': 0.6557232623371865, 'learning_rate': 2.2872509640675476e-06, 'epoch': 0.69} + 69%|██████▉ | 8436/12188 [18:02:01<8:00:12, 7.68s/it] 69%|██████▉ | 8437/12188 [18:02:08<7:37:34, 7.32s/it] {'loss': 0.3443, 'grad_norm': 0.6947883515319264, 'learning_rate': 2.2861349153557057e-06, 'epoch': 0.69} + 69%|██████▉ | 8437/12188 [18:02:08<7:37:34, 7.32s/it] 69%|██████▉ | 8438/12188 [18:02:16<7:53:11, 7.57s/it] {'loss': 0.3291, 'grad_norm': 0.7435085581347918, 'learning_rate': 2.285019058292655e-06, 'epoch': 0.69} + 69%|██████▉ | 8438/12188 [18:02:16<7:53:11, 7.57s/it] 69%|██████▉ | 8439/12188 [18:02:37<12:14:01, 11.75s/it] {'loss': 0.3012, 'grad_norm': 0.7536326158277764, 'learning_rate': 2.283903392957199e-06, 'epoch': 0.69} + 69%|██████▉ | 8439/12188 [18:02:37<12:14:01, 11.75s/it] 69%|██████▉ | 8440/12188 [18:02:46<11:07:09, 10.68s/it] {'loss': 0.29, 'grad_norm': 0.7515719463836891, 'learning_rate': 2.2827879194281196e-06, 'epoch': 0.69} + 69%|██████▉ | 8440/12188 [18:02:46<11:07:09, 10.68s/it] 69%|██████▉ | 8441/12188 [18:02:53<9:58:05, 9.58s/it] {'loss': 0.3321, 'grad_norm': 0.6797721049335307, 'learning_rate': 2.2816726377841935e-06, 'epoch': 0.69} + 69%|██████▉ | 8441/12188 [18:02:53<9:58:05, 9.58s/it] 69%|██████▉ | 8442/12188 [18:03:00<9:16:10, 8.91s/it] {'loss': 0.3128, 'grad_norm': 0.7365241092248542, 'learning_rate': 2.280557548104177e-06, 'epoch': 0.69} + 69%|██████▉ | 8442/12188 [18:03:00<9:16:10, 8.91s/it] 69%|██████▉ | 8443/12188 [18:03:08<8:58:46, 8.63s/it] {'loss': 0.3186, 'grad_norm': 0.6538619279453959, 'learning_rate': 2.2794426504668164e-06, 'epoch': 0.69} + 69%|██████▉ | 8443/12188 [18:03:08<8:58:46, 8.63s/it] 69%|██████▉ | 8444/12188 [18:03:15<8:36:54, 8.28s/it] {'loss': 0.2943, 'grad_norm': 0.6663924923873927, 'learning_rate': 2.278327944950848e-06, 'epoch': 0.69} + 69%|██████▉ | 8444/12188 [18:03:15<8:36:54, 8.28s/it] 69%|██████▉ | 8445/12188 [18:03:23<8:20:12, 8.02s/it] {'loss': 0.3265, 'grad_norm': 0.6800300791901293, 'learning_rate': 2.2772134316349864e-06, 'epoch': 0.69} + 69%|██████▉ | 8445/12188 [18:03:23<8:20:12, 8.02s/it] 69%|██████▉ | 8446/12188 [18:03:30<8:01:11, 7.72s/it] {'loss': 0.3363, 'grad_norm': 0.6473853602246318, 'learning_rate': 2.2760991105979363e-06, 'epoch': 0.69} + 69%|██████▉ | 8446/12188 [18:03:30<8:01:11, 7.72s/it] 69%|██████▉ | 8447/12188 [18:03:37<7:43:11, 7.43s/it] {'loss': 0.3162, 'grad_norm': 0.6513515515546079, 'learning_rate': 2.274984981918393e-06, 'epoch': 0.69} + 69%|██████▉ | 8447/12188 [18:03:37<7:43:11, 7.43s/it] 69%|██████▉ | 8448/12188 [18:03:43<7:26:49, 7.17s/it] {'loss': 0.2829, 'grad_norm': 0.6736381327219939, 'learning_rate': 2.2738710456750296e-06, 'epoch': 0.69} + 69%|██████▉ | 8448/12188 [18:03:43<7:26:49, 7.17s/it] 69%|██████▉ | 8449/12188 [18:03:50<7:19:03, 7.05s/it] {'loss': 0.2897, 'grad_norm': 0.6504726319282755, 'learning_rate': 2.2727573019465154e-06, 'epoch': 0.69} + 69%|██████▉ | 8449/12188 [18:03:50<7:19:03, 7.05s/it] 69%|██████▉ | 8450/12188 [18:03:57<7:12:04, 6.94s/it] {'loss': 0.3368, 'grad_norm': 0.8195302209823659, 'learning_rate': 2.271643750811497e-06, 'epoch': 0.69} + 69%|██████▉ | 8450/12188 [18:03:57<7:12:04, 6.94s/it] 69%|██████▉ | 8451/12188 [18:04:04<7:25:26, 7.15s/it] {'loss': 0.3076, 'grad_norm': 0.7639111801703793, 'learning_rate': 2.270530392348613e-06, 'epoch': 0.69} + 69%|██████▉ | 8451/12188 [18:04:04<7:25:26, 7.15s/it] 69%|██████▉ | 8452/12188 [18:04:11<7:16:18, 7.01s/it] {'loss': 0.3032, 'grad_norm': 0.6496517855364619, 'learning_rate': 2.26941722663649e-06, 'epoch': 0.69} + 69%|██████▉ | 8452/12188 [18:04:11<7:16:18, 7.01s/it] 69%|██████▉ | 8453/12188 [18:04:18<7:22:13, 7.10s/it] {'loss': 0.3076, 'grad_norm': 0.6814060996710656, 'learning_rate': 2.268304253753733e-06, 'epoch': 0.69} + 69%|██████▉ | 8453/12188 [18:04:18<7:22:13, 7.10s/it] 69%|██████▉ | 8454/12188 [18:04:26<7:34:29, 7.30s/it] {'loss': 0.2723, 'grad_norm': 0.5991610833811132, 'learning_rate': 2.267191473778943e-06, 'epoch': 0.69} + 69%|██████▉ | 8454/12188 [18:04:26<7:34:29, 7.30s/it] 69%|██████▉ | 8455/12188 [18:04:33<7:24:45, 7.15s/it] {'loss': 0.2835, 'grad_norm': 0.6676611007981537, 'learning_rate': 2.2660788867906985e-06, 'epoch': 0.69} + 69%|██████▉ | 8455/12188 [18:04:33<7:24:45, 7.15s/it] 69%|██████▉ | 8456/12188 [18:04:40<7:27:27, 7.19s/it] {'loss': 0.3461, 'grad_norm': 0.7134101157471192, 'learning_rate': 2.264966492867571e-06, 'epoch': 0.69} + 69%|██████▉ | 8456/12188 [18:04:40<7:27:27, 7.19s/it] 69%|██████▉ | 8457/12188 [18:04:47<7:14:45, 6.99s/it] {'loss': 0.2799, 'grad_norm': 0.7130902454564787, 'learning_rate': 2.2638542920881175e-06, 'epoch': 0.69} + 69%|██████▉ | 8457/12188 [18:04:47<7:14:45, 6.99s/it] 69%|██████▉ | 8458/12188 [18:04:54<7:26:39, 7.18s/it] {'loss': 0.2928, 'grad_norm': 0.6772103767263459, 'learning_rate': 2.262742284530877e-06, 'epoch': 0.69} + 69%|██████▉ | 8458/12188 [18:04:54<7:26:39, 7.18s/it] 69%|██████▉ | 8459/12188 [18:05:02<7:43:29, 7.46s/it] {'loss': 0.3334, 'grad_norm': 0.6487930454348104, 'learning_rate': 2.261630470274378e-06, 'epoch': 0.69} + 69%|██████▉ | 8459/12188 [18:05:02<7:43:29, 7.46s/it] 69%|██████▉ | 8460/12188 [18:05:10<7:39:57, 7.40s/it] {'loss': 0.3043, 'grad_norm': 0.6493656966667773, 'learning_rate': 2.2605188493971363e-06, 'epoch': 0.69} + 69%|██████▉ | 8460/12188 [18:05:10<7:39:57, 7.40s/it] 69%|██████▉ | 8461/12188 [18:05:31<12:03:27, 11.65s/it] {'loss': 0.3085, 'grad_norm': 0.742167988955624, 'learning_rate': 2.2594074219776514e-06, 'epoch': 0.69} + 69%|██████▉ | 8461/12188 [18:05:31<12:03:27, 11.65s/it] 69%|██████▉ | 8462/12188 [18:05:39<10:52:03, 10.50s/it] {'loss': 0.3264, 'grad_norm': 0.6371695928703363, 'learning_rate': 2.2582961880944126e-06, 'epoch': 0.69} + 69%|██████▉ | 8462/12188 [18:05:39<10:52:03, 10.50s/it] 69%|██████▉ | 8463/12188 [18:05:45<9:36:39, 9.29s/it] {'loss': 0.2927, 'grad_norm': 0.6403792179120508, 'learning_rate': 2.2571851478258903e-06, 'epoch': 0.69} + 69%|██████▉ | 8463/12188 [18:05:45<9:36:39, 9.29s/it] 69%|██████▉ | 8464/12188 [18:06:06<13:03:22, 12.62s/it] {'loss': 0.2462, 'grad_norm': 0.5979113380143684, 'learning_rate': 2.256074301250546e-06, 'epoch': 0.69} + 69%|██████▉ | 8464/12188 [18:06:06<13:03:22, 12.62s/it] 69%|██████▉ | 8465/12188 [18:06:13<11:16:09, 10.90s/it] {'loss': 0.3035, 'grad_norm': 0.6881220846485586, 'learning_rate': 2.2549636484468284e-06, 'epoch': 0.69} + 69%|██████▉ | 8465/12188 [18:06:13<11:16:09, 10.90s/it] 69%|██████▉ | 8466/12188 [18:06:20<10:11:38, 9.86s/it] {'loss': 0.321, 'grad_norm': 0.6805016274598356, 'learning_rate': 2.2538531894931655e-06, 'epoch': 0.69} + 69%|██████▉ | 8466/12188 [18:06:20<10:11:38, 9.86s/it] 69%|██████▉ | 8467/12188 [18:06:40<13:24:12, 12.97s/it] {'loss': 0.3034, 'grad_norm': 0.6710819876545808, 'learning_rate': 2.2527429244679805e-06, 'epoch': 0.69} + 69%|██████▉ | 8467/12188 [18:06:40<13:24:12, 12.97s/it] 69%|██████▉ | 8468/12188 [18:06:47<11:33:53, 11.19s/it] {'loss': 0.2992, 'grad_norm': 0.7554629913269382, 'learning_rate': 2.251632853449674e-06, 'epoch': 0.69} + 69%|██████▉ | 8468/12188 [18:06:47<11:33:53, 11.19s/it] 69%|██████▉ | 8469/12188 [18:07:07<14:08:19, 13.69s/it] {'loss': 0.3071, 'grad_norm': 0.7146282863423231, 'learning_rate': 2.2505229765166403e-06, 'epoch': 0.69} + 69%|██████▉ | 8469/12188 [18:07:07<14:08:19, 13.69s/it] 69%|██████▉ | 8470/12188 [18:07:14<12:07:47, 11.74s/it] {'loss': 0.2962, 'grad_norm': 0.7353264341498161, 'learning_rate': 2.249413293747258e-06, 'epoch': 0.69} + 69%|██████▉ | 8470/12188 [18:07:14<12:07:47, 11.74s/it] 70%|██████▉ | 8471/12188 [18:07:35<14:47:52, 14.33s/it] {'loss': 0.2817, 'grad_norm': 0.6393813188790107, 'learning_rate': 2.2483038052198888e-06, 'epoch': 0.69} + 70%|██████▉ | 8471/12188 [18:07:35<14:47:52, 14.33s/it] 70%|██████▉ | 8472/12188 [18:07:41<12:29:42, 12.11s/it] {'loss': 0.2963, 'grad_norm': 0.7686730147652518, 'learning_rate': 2.2471945110128846e-06, 'epoch': 0.7} + 70%|██████▉ | 8472/12188 [18:07:41<12:29:42, 12.11s/it] 70%|██████▉ | 8473/12188 [18:07:49<11:03:38, 10.72s/it] {'loss': 0.3127, 'grad_norm': 0.6804731470868303, 'learning_rate': 2.2460854112045823e-06, 'epoch': 0.7} + 70%|██████▉ | 8473/12188 [18:07:49<11:03:38, 10.72s/it] 70%|██████▉ | 8474/12188 [18:08:11<14:27:48, 14.02s/it] {'loss': 0.2909, 'grad_norm': 0.6693717412334199, 'learning_rate': 2.244976505873301e-06, 'epoch': 0.7} + 70%|██████▉ | 8474/12188 [18:08:11<14:27:48, 14.02s/it] 70%|██████▉ | 8475/12188 [18:08:18<12:25:35, 12.05s/it] {'loss': 0.2962, 'grad_norm': 0.6333412071259485, 'learning_rate': 2.2438677950973546e-06, 'epoch': 0.7} + 70%|██████▉ | 8475/12188 [18:08:18<12:25:35, 12.05s/it] 70%|██████▉ | 8476/12188 [18:08:25<10:49:08, 10.49s/it] {'loss': 0.2844, 'grad_norm': 0.6845210240954418, 'learning_rate': 2.242759278955035e-06, 'epoch': 0.7} + 70%|██████▉ | 8476/12188 [18:08:25<10:49:08, 10.49s/it] 70%|██████▉ | 8477/12188 [18:08:50<15:13:08, 14.76s/it] {'loss': 0.2745, 'grad_norm': 0.6632446044707546, 'learning_rate': 2.2416509575246276e-06, 'epoch': 0.7} + 70%|██████▉ | 8477/12188 [18:08:50<15:13:08, 14.76s/it] 70%|██████▉ | 8478/12188 [18:08:56<12:45:36, 12.38s/it] {'loss': 0.3352, 'grad_norm': 0.6412999220113647, 'learning_rate': 2.2405428308843953e-06, 'epoch': 0.7} + 70%|██████▉ | 8478/12188 [18:08:57<12:45:36, 12.38s/it] 70%|██████▉ | 8479/12188 [18:09:04<11:05:55, 10.77s/it] {'loss': 0.3118, 'grad_norm': 0.6412125454679405, 'learning_rate': 2.2394348991125954e-06, 'epoch': 0.7} + 70%|██████▉ | 8479/12188 [18:09:04<11:05:55, 10.77s/it] 70%|██████▉ | 8480/12188 [18:09:25<14:27:34, 14.04s/it] {'loss': 0.298, 'grad_norm': 0.6568649426558497, 'learning_rate': 2.2383271622874693e-06, 'epoch': 0.7} + 70%|██████▉ | 8480/12188 [18:09:25<14:27:34, 14.04s/it] 70%|██████▉ | 8481/12188 [18:09:33<12:24:13, 12.05s/it] {'loss': 0.3034, 'grad_norm': 0.6955852841164623, 'learning_rate': 2.23721962048724e-06, 'epoch': 0.7} + 70%|██████▉ | 8481/12188 [18:09:33<12:24:13, 12.05s/it] 70%|██████▉ | 8482/12188 [18:09:57<16:08:51, 15.69s/it] {'loss': 0.3451, 'grad_norm': 0.936172494490829, 'learning_rate': 2.2361122737901225e-06, 'epoch': 0.7} + 70%|██████▉ | 8482/12188 [18:09:57<16:08:51, 15.69s/it] 70%|██████▉ | 8483/12188 [18:10:18<17:52:38, 17.37s/it] {'loss': 0.2968, 'grad_norm': 0.6543080035420153, 'learning_rate': 2.2350051222743175e-06, 'epoch': 0.7} + 70%|██████▉ | 8483/12188 [18:10:18<17:52:38, 17.37s/it] 70%|██████▉ | 8484/12188 [18:10:38<18:48:43, 18.28s/it] {'loss': 0.3095, 'grad_norm': 0.9893957896938688, 'learning_rate': 2.2338981660180065e-06, 'epoch': 0.7} + 70%|██████▉ | 8484/12188 [18:10:38<18:48:43, 18.28s/it] 70%|██████▉ | 8485/12188 [18:11:04<20:58:29, 20.39s/it] {'loss': 0.2894, 'grad_norm': 0.6410898296729268, 'learning_rate': 2.2327914050993647e-06, 'epoch': 0.7} + 70%|██████▉ | 8485/12188 [18:11:04<20:58:29, 20.39s/it] 70%|██████▉ | 8486/12188 [18:11:12<17:16:37, 16.80s/it] {'loss': 0.2924, 'grad_norm': 0.6008335056655068, 'learning_rate': 2.2316848395965483e-06, 'epoch': 0.7} + 70%|██████▉ | 8486/12188 [18:11:12<17:16:37, 16.80s/it] 70%|██████▉ | 8487/12188 [18:11:19<14:10:13, 13.78s/it] {'loss': 0.3126, 'grad_norm': 0.6943207379181008, 'learning_rate': 2.230578469587699e-06, 'epoch': 0.7} + 70%|██████▉ | 8487/12188 [18:11:19<14:10:13, 13.78s/it] 70%|██████▉ | 8488/12188 [18:11:51<19:43:39, 19.19s/it] {'loss': 0.2937, 'grad_norm': 0.6685655355411761, 'learning_rate': 2.2294722951509514e-06, 'epoch': 0.7} + 70%|██████▉ | 8488/12188 [18:11:51<19:43:39, 19.19s/it] 70%|██████▉ | 8489/12188 [18:11:58<16:07:19, 15.69s/it] {'loss': 0.3042, 'grad_norm': 0.6608278371609908, 'learning_rate': 2.228366316364416e-06, 'epoch': 0.7} + 70%|██████▉ | 8489/12188 [18:11:58<16:07:19, 15.69s/it] 70%|██████▉ | 8490/12188 [18:12:06<13:45:31, 13.39s/it] {'loss': 0.2967, 'grad_norm': 0.6772698193010623, 'learning_rate': 2.227260533306201e-06, 'epoch': 0.7} + 70%|██████▉ | 8490/12188 [18:12:06<13:45:31, 13.39s/it] 70%|██████▉ | 8491/12188 [18:12:29<16:33:13, 16.12s/it] {'loss': 0.2964, 'grad_norm': 0.7581986358014706, 'learning_rate': 2.2261549460543903e-06, 'epoch': 0.7} + 70%|██████▉ | 8491/12188 [18:12:29<16:33:13, 16.12s/it] 70%|██████▉ | 8492/12188 [18:12:35<13:37:29, 13.27s/it] {'loss': 0.3177, 'grad_norm': 0.7106067005898171, 'learning_rate': 2.2250495546870605e-06, 'epoch': 0.7} + 70%|██████▉ | 8492/12188 [18:12:35<13:37:29, 13.27s/it] 70%|██████▉ | 8493/12188 [18:12:54<15:17:51, 14.90s/it] {'loss': 0.315, 'grad_norm': 0.7422588591987173, 'learning_rate': 2.223944359282275e-06, 'epoch': 0.7} + 70%|██████▉ | 8493/12188 [18:12:54<15:17:51, 14.90s/it] 70%|██████▉ | 8494/12188 [18:13:17<17:43:05, 17.27s/it] {'loss': 0.3072, 'grad_norm': 0.714669480799354, 'learning_rate': 2.222839359918077e-06, 'epoch': 0.7} + 70%|██████▉ | 8494/12188 [18:13:17<17:43:05, 17.27s/it] 70%|██████▉ | 8495/12188 [18:13:24<14:34:10, 14.20s/it] {'loss': 0.2943, 'grad_norm': 0.6523179932380464, 'learning_rate': 2.2217345566725033e-06, 'epoch': 0.7} + 70%|██████▉ | 8495/12188 [18:13:24<14:34:10, 14.20s/it] 70%|██████▉ | 8496/12188 [18:13:42<15:52:33, 15.48s/it] {'loss': 0.3108, 'grad_norm': 0.719719935610785, 'learning_rate': 2.2206299496235696e-06, 'epoch': 0.7} + 70%|██████▉ | 8496/12188 [18:13:42<15:52:33, 15.48s/it] 70%|██████▉ | 8497/12188 [18:13:49<13:11:02, 12.86s/it] {'loss': 0.2686, 'grad_norm': 0.6682463017930667, 'learning_rate': 2.2195255388492837e-06, 'epoch': 0.7} + 70%|██████▉ | 8497/12188 [18:13:49<13:11:02, 12.86s/it] 70%|██████▉ | 8498/12188 [18:13:56<11:26:57, 11.17s/it] {'loss': 0.2927, 'grad_norm': 0.6776076471543394, 'learning_rate': 2.2184213244276386e-06, 'epoch': 0.7} + 70%|██████▉ | 8498/12188 [18:13:56<11:26:57, 11.17s/it] 70%|██████▉ | 8499/12188 [18:14:03<10:07:30, 9.88s/it] {'loss': 0.3096, 'grad_norm': 0.6837747519286048, 'learning_rate': 2.2173173064366093e-06, 'epoch': 0.7} + 70%|██████▉ | 8499/12188 [18:14:03<10:07:30, 9.88s/it] 70%|██████▉ | 8500/12188 [18:14:22<12:48:46, 12.51s/it] {'loss': 0.2963, 'grad_norm': 0.6753766162029626, 'learning_rate': 2.216213484954162e-06, 'epoch': 0.7} + 70%|██████▉ | 8500/12188 [18:14:22<12:48:46, 12.51s/it] 70%|██████▉ | 8501/12188 [18:14:29<11:09:44, 10.90s/it] {'loss': 0.2913, 'grad_norm': 0.7072782753978214, 'learning_rate': 2.215109860058247e-06, 'epoch': 0.7} + 70%|██████▉ | 8501/12188 [18:14:29<11:09:44, 10.90s/it] 70%|██████▉ | 8502/12188 [18:14:49<13:59:33, 13.67s/it] {'loss': 0.2769, 'grad_norm': 0.6762272123048779, 'learning_rate': 2.2140064318267974e-06, 'epoch': 0.7} + 70%|██████▉ | 8502/12188 [18:14:49<13:59:33, 13.67s/it] 70%|██████▉ | 8503/12188 [18:15:11<16:30:26, 16.13s/it] {'loss': 0.3082, 'grad_norm': 0.7134288471854526, 'learning_rate': 2.212903200337739e-06, 'epoch': 0.7} + 70%|██████▉ | 8503/12188 [18:15:11<16:30:26, 16.13s/it] 70%|██████▉ | 8504/12188 [18:15:52<24:10:45, 23.63s/it] {'loss': 0.3, 'grad_norm': 0.7065478549039748, 'learning_rate': 2.211800165668977e-06, 'epoch': 0.7} + 70%|██████▉ | 8504/12188 [18:15:52<24:10:45, 23.63s/it] 70%|██████▉ | 8505/12188 [18:16:14<23:42:07, 23.17s/it] {'loss': 0.304, 'grad_norm': 0.6534283602810218, 'learning_rate': 2.210697327898407e-06, 'epoch': 0.7} + 70%|██████▉ | 8505/12188 [18:16:14<23:42:07, 23.17s/it] 70%|██████▉ | 8506/12188 [18:16:22<18:50:49, 18.43s/it] {'loss': 0.3041, 'grad_norm': 0.6397686726802237, 'learning_rate': 2.209594687103914e-06, 'epoch': 0.7} + 70%|██████▉ | 8506/12188 [18:16:22<18:50:49, 18.43s/it] 70%|██████▉ | 8507/12188 [18:16:29<15:30:41, 15.17s/it] {'loss': 0.3063, 'grad_norm': 0.6954571643098861, 'learning_rate': 2.2084922433633577e-06, 'epoch': 0.7} + 70%|██████▉ | 8507/12188 [18:16:29<15:30:41, 15.17s/it] 70%|██████▉ | 8508/12188 [18:16:52<17:48:43, 17.42s/it] {'loss': 0.3022, 'grad_norm': 0.688651714856069, 'learning_rate': 2.2073899967545966e-06, 'epoch': 0.7} + 70%|██████▉ | 8508/12188 [18:16:52<17:48:43, 17.42s/it] 70%|██████▉ | 8509/12188 [18:16:59<14:31:08, 14.21s/it] {'loss': 0.3123, 'grad_norm': 0.7298141847918963, 'learning_rate': 2.2062879473554654e-06, 'epoch': 0.7} + 70%|██████▉ | 8509/12188 [18:16:59<14:31:08, 14.21s/it] 70%|██████▉ | 8510/12188 [18:17:17<15:55:39, 15.59s/it] {'loss': 0.3072, 'grad_norm': 0.7462393783912802, 'learning_rate': 2.205186095243791e-06, 'epoch': 0.7} + 70%|██████▉ | 8510/12188 [18:17:17<15:55:39, 15.59s/it] 70%|██████▉ | 8511/12188 [18:17:42<18:47:36, 18.40s/it] {'loss': 0.3285, 'grad_norm': 0.7829643131397663, 'learning_rate': 2.204084440497386e-06, 'epoch': 0.7} + 70%|██████▉ | 8511/12188 [18:17:42<18:47:36, 18.40s/it] 70%|██████▉ | 8512/12188 [18:17:50<15:26:02, 15.12s/it] {'loss': 0.284, 'grad_norm': 0.6600021830884221, 'learning_rate': 2.202982983194044e-06, 'epoch': 0.7} + 70%|██████▉ | 8512/12188 [18:17:50<15:26:02, 15.12s/it] 70%|██████▉ | 8513/12188 [18:18:09<16:32:27, 16.20s/it] {'loss': 0.3056, 'grad_norm': 0.6595529175556403, 'learning_rate': 2.201881723411552e-06, 'epoch': 0.7} + 70%|██████▉ | 8513/12188 [18:18:09<16:32:27, 16.20s/it] 70%|██████▉ | 8514/12188 [18:18:32<18:39:51, 18.29s/it] {'loss': 0.2816, 'grad_norm': 0.6382586815899334, 'learning_rate': 2.2007806612276767e-06, 'epoch': 0.7} + 70%|██████▉ | 8514/12188 [18:18:32<18:39:51, 18.29s/it] 70%|██████▉ | 8515/12188 [18:18:51<18:56:51, 18.57s/it] {'loss': 0.2863, 'grad_norm': 0.6837743251660042, 'learning_rate': 2.1996797967201727e-06, 'epoch': 0.7} + 70%|██████▉ | 8515/12188 [18:18:51<18:56:51, 18.57s/it] 70%|██████▉ | 8516/12188 [18:18:58<15:19:25, 15.02s/it] {'loss': 0.2798, 'grad_norm': 0.6650046922017776, 'learning_rate': 2.1985791299667835e-06, 'epoch': 0.7} + 70%|██████▉ | 8516/12188 [18:18:58<15:19:25, 15.02s/it] 70%|██████▉ | 8517/12188 [18:19:19<17:10:51, 16.85s/it] {'loss': 0.3267, 'grad_norm': 0.694503330612488, 'learning_rate': 2.1974786610452342e-06, 'epoch': 0.7} + 70%|██████▉ | 8517/12188 [18:19:19<17:10:51, 16.85s/it] 70%|██████▉ | 8518/12188 [18:19:26<14:06:57, 13.85s/it] {'loss': 0.3338, 'grad_norm': 0.6449413886258419, 'learning_rate': 2.1963783900332384e-06, 'epoch': 0.7} + 70%|██████▉ | 8518/12188 [18:19:26<14:06:57, 13.85s/it] 70%|██████▉ | 8519/12188 [18:19:32<11:51:01, 11.63s/it] {'loss': 0.2931, 'grad_norm': 0.6837157264866787, 'learning_rate': 2.195278317008499e-06, 'epoch': 0.7} + 70%|██████▉ | 8519/12188 [18:19:32<11:51:01, 11.63s/it] 70%|��█████▉ | 8520/12188 [18:19:52<14:28:31, 14.21s/it] {'loss': 0.2965, 'grad_norm': 0.6671018391937721, 'learning_rate': 2.194178442048697e-06, 'epoch': 0.7} + 70%|██████▉ | 8520/12188 [18:19:52<14:28:31, 14.21s/it] 70%|██████▉ | 8521/12188 [18:20:29<21:13:55, 20.84s/it] {'loss': 0.3071, 'grad_norm': 0.7151557565524083, 'learning_rate': 2.1930787652315066e-06, 'epoch': 0.7} + 70%|██████▉ | 8521/12188 [18:20:29<21:13:55, 20.84s/it] 70%|██████▉ | 8522/12188 [18:20:48<20:50:06, 20.46s/it] {'loss': 0.3096, 'grad_norm': 0.6695839138099848, 'learning_rate': 2.1919792866345822e-06, 'epoch': 0.7} + 70%|██████▉ | 8522/12188 [18:20:48<20:50:06, 20.46s/it] 70%|██████▉ | 8523/12188 [18:21:08<20:31:37, 20.16s/it] {'loss': 0.2999, 'grad_norm': 0.7455062299650645, 'learning_rate': 2.19088000633557e-06, 'epoch': 0.7} + 70%|██████▉ | 8523/12188 [18:21:08<20:31:37, 20.16s/it] 70%|██████▉ | 8524/12188 [18:21:15<16:28:03, 16.18s/it] {'loss': 0.3169, 'grad_norm': 0.873165421170161, 'learning_rate': 2.1897809244120998e-06, 'epoch': 0.7} + 70%|██████▉ | 8524/12188 [18:21:15<16:28:03, 16.18s/it] 70%|██████▉ | 8525/12188 [18:21:23<14:04:59, 13.84s/it] {'loss': 0.2908, 'grad_norm': 0.6198386032555964, 'learning_rate': 2.188682040941784e-06, 'epoch': 0.7} + 70%|██████▉ | 8525/12188 [18:21:23<14:04:59, 13.84s/it] 70%|██████▉ | 8526/12188 [18:21:31<12:12:20, 12.00s/it] {'loss': 0.3248, 'grad_norm': 0.7065028096416244, 'learning_rate': 2.187583356002228e-06, 'epoch': 0.7} + 70%|██████▉ | 8526/12188 [18:21:31<12:12:20, 12.00s/it] 70%|██████▉ | 8527/12188 [18:21:37<10:30:13, 10.33s/it] {'loss': 0.3166, 'grad_norm': 0.6640600448042938, 'learning_rate': 2.1864848696710146e-06, 'epoch': 0.7} + 70%|██████▉ | 8527/12188 [18:21:37<10:30:13, 10.33s/it] 70%|██████▉ | 8528/12188 [18:21:44<9:29:33, 9.34s/it] {'loss': 0.3687, 'grad_norm': 0.7561087207564882, 'learning_rate': 2.1853865820257215e-06, 'epoch': 0.7} + 70%|██████▉ | 8528/12188 [18:21:44<9:29:33, 9.34s/it] 70%|██████▉ | 8529/12188 [18:21:52<8:56:07, 8.79s/it] {'loss': 0.2809, 'grad_norm': 0.6366687769359334, 'learning_rate': 2.1842884931439055e-06, 'epoch': 0.7} + 70%|██████▉ | 8529/12188 [18:21:52<8:56:07, 8.79s/it] 70%|██████▉ | 8530/12188 [18:22:11<12:17:05, 12.09s/it] {'loss': 0.2979, 'grad_norm': 0.6452675215451865, 'learning_rate': 2.1831906031031115e-06, 'epoch': 0.7} + 70%|██████▉ | 8530/12188 [18:22:11<12:17:05, 12.09s/it] 70%|██████▉ | 8531/12188 [18:22:18<10:40:59, 10.52s/it] {'loss': 0.3022, 'grad_norm': 0.6693408975177367, 'learning_rate': 2.1820929119808727e-06, 'epoch': 0.7} + 70%|██████▉ | 8531/12188 [18:22:18<10:40:59, 10.52s/it] 70%|███████ | 8532/12188 [18:22:25<9:34:23, 9.43s/it] {'loss': 0.3174, 'grad_norm': 0.6955174710510156, 'learning_rate': 2.180995419854703e-06, 'epoch': 0.7} + 70%|███████ | 8532/12188 [18:22:25<9:34:23, 9.43s/it] 70%|███████ | 8533/12188 [18:22:32<8:46:32, 8.64s/it] {'loss': 0.2856, 'grad_norm': 0.7109368201493697, 'learning_rate': 2.179898126802108e-06, 'epoch': 0.7} + 70%|███████ | 8533/12188 [18:22:32<8:46:32, 8.64s/it] 70%|███████ | 8534/12188 [18:22:39<8:14:15, 8.12s/it] {'loss': 0.2984, 'grad_norm': 0.7141792313164439, 'learning_rate': 2.1788010329005783e-06, 'epoch': 0.7} + 70%|███████ | 8534/12188 [18:22:39<8:14:15, 8.12s/it] 70%|███████ | 8535/12188 [18:22:46<7:50:42, 7.73s/it] {'loss': 0.3253, 'grad_norm': 0.6545095203426954, 'learning_rate': 2.1777041382275854e-06, 'epoch': 0.7} + 70%|███████ | 8535/12188 [18:22:46<7:50:42, 7.73s/it] 70%|███████ | 8536/12188 [18:22:53<7:52:31, 7.76s/it] {'loss': 0.3164, 'grad_norm': 0.6443531288297351, 'learning_rate': 2.1766074428605916e-06, 'epoch': 0.7} + 70%|███████ | 8536/12188 [18:22:53<7:52:31, 7.76s/it] 70%|███████ | 8537/12188 [18:23:01<7:50:27, 7.73s/it] {'loss': 0.303, 'grad_norm': 0.6668816268413645, 'learning_rate': 2.175510946877046e-06, 'epoch': 0.7} + 70%|███████ | 8537/12188 [18:23:01<7:50:27, 7.73s/it] 70%|███████ | 8538/12188 [18:23:08<7:39:28, 7.55s/it] {'loss': 0.3294, 'grad_norm': 0.7366697625616684, 'learning_rate': 2.1744146503543777e-06, 'epoch': 0.7} + 70%|███████ | 8538/12188 [18:23:08<7:39:28, 7.55s/it] 70%|███████ | 8539/12188 [18:23:15<7:23:42, 7.30s/it] {'loss': 0.324, 'grad_norm': 0.8364004962291854, 'learning_rate': 2.1733185533700093e-06, 'epoch': 0.7} + 70%|███████ | 8539/12188 [18:23:15<7:23:42, 7.30s/it] 70%|███████ | 8540/12188 [18:23:22<7:15:34, 7.16s/it] {'loss': 0.3335, 'grad_norm': 0.6884578951158553, 'learning_rate': 2.172222656001341e-06, 'epoch': 0.7} + 70%|███████ | 8540/12188 [18:23:22<7:15:34, 7.16s/it] 70%|███████ | 8541/12188 [18:23:29<7:11:01, 7.09s/it] {'loss': 0.2811, 'grad_norm': 0.6753598187263437, 'learning_rate': 2.171126958325767e-06, 'epoch': 0.7} + 70%|███████ | 8541/12188 [18:23:29<7:11:01, 7.09s/it] 70%|███████ | 8542/12188 [18:23:35<7:04:13, 6.98s/it] {'loss': 0.3325, 'grad_norm': 0.7736914807118115, 'learning_rate': 2.170031460420663e-06, 'epoch': 0.7} + 70%|███████ | 8542/12188 [18:23:35<7:04:13, 6.98s/it] 70%|███████ | 8543/12188 [18:23:43<7:12:44, 7.12s/it] {'loss': 0.3052, 'grad_norm': 0.6886556350711053, 'learning_rate': 2.168936162363388e-06, 'epoch': 0.7} + 70%|███████ | 8543/12188 [18:23:43<7:12:44, 7.12s/it] 70%|███████ | 8544/12188 [18:23:51<7:27:36, 7.37s/it] {'loss': 0.2616, 'grad_norm': 0.7372744653935149, 'learning_rate': 2.167841064231295e-06, 'epoch': 0.7} + 70%|███████ | 8544/12188 [18:23:51<7:27:36, 7.37s/it] 70%|███████ | 8545/12188 [18:23:58<7:23:26, 7.30s/it] {'loss': 0.3188, 'grad_norm': 0.6259341958917373, 'learning_rate': 2.166746166101714e-06, 'epoch': 0.7} + 70%|███████ | 8545/12188 [18:23:58<7:23:26, 7.30s/it] 70%|███████ | 8546/12188 [18:24:05<7:23:51, 7.31s/it] {'loss': 0.309, 'grad_norm': 0.6589217726028992, 'learning_rate': 2.1656514680519676e-06, 'epoch': 0.7} + 70%|███████ | 8546/12188 [18:24:05<7:23:51, 7.31s/it] 70%|███████ | 8547/12188 [18:24:13<7:25:27, 7.34s/it] {'loss': 0.3005, 'grad_norm': 0.8973042663716344, 'learning_rate': 2.1645569701593616e-06, 'epoch': 0.7} + 70%|███████ | 8547/12188 [18:24:13<7:25:27, 7.34s/it] 70%|███████ | 8548/12188 [18:24:20<7:14:15, 7.16s/it] {'loss': 0.3012, 'grad_norm': 0.7434541644038816, 'learning_rate': 2.1634626725011854e-06, 'epoch': 0.7} + 70%|███████ | 8548/12188 [18:24:20<7:14:15, 7.16s/it] 70%|███████ | 8549/12188 [18:24:26<7:07:18, 7.05s/it] {'loss': 0.3128, 'grad_norm': 0.7137574172371192, 'learning_rate': 2.1623685751547198e-06, 'epoch': 0.7} + 70%|███████ | 8549/12188 [18:24:26<7:07:18, 7.05s/it] 70%|███████ | 8550/12188 [18:24:33<7:03:45, 6.99s/it] {'loss': 0.2869, 'grad_norm': 0.6927715548597561, 'learning_rate': 2.1612746781972242e-06, 'epoch': 0.7} + 70%|███████ | 8550/12188 [18:24:33<7:03:45, 6.99s/it] 70%|███████ | 8551/12188 [18:24:40<7:05:12, 7.01s/it] {'loss': 0.2964, 'grad_norm': 0.6632173784691282, 'learning_rate': 2.1601809817059503e-06, 'epoch': 0.7} + 70%|███████ | 8551/12188 [18:24:40<7:05:12, 7.01s/it] 70%|███████ | 8552/12188 [18:24:48<7:11:38, 7.12s/it] {'loss': 0.3139, 'grad_norm': 0.6520517211952185, 'learning_rate': 2.159087485758135e-06, 'epoch': 0.7} + 70%|███████ | 8552/12188 [18:24:48<7:11:38, 7.12s/it] 70%|███████ | 8553/12188 [18:24:55<7:10:14, 7.10s/it] {'loss': 0.2816, 'grad_norm': 0.6854977554721713, 'learning_rate': 2.157994190430995e-06, 'epoch': 0.7} + 70%|███████ | 8553/12188 [18:24:55<7:10:14, 7.10s/it] 70%|███████ | 8554/12188 [18:25:02<7:08:18, 7.07s/it] {'loss': 0.2847, 'grad_norm': 0.646080073005483, 'learning_rate': 2.1569010958017416e-06, 'epoch': 0.7} + 70%|███████ | 8554/12188 [18:25:02<7:08:18, 7.07s/it] 70%|███████ | 8555/12188 [18:25:08<7:02:09, 6.97s/it] {'loss': 0.2978, 'grad_norm': 0.6770645417625125, 'learning_rate': 2.155808201947563e-06, 'epoch': 0.7} + 70%|███████ | 8555/12188 [18:25:08<7:02:09, 6.97s/it] 70%|███████ | 8556/12188 [18:25:16<7:12:49, 7.15s/it] {'loss': 0.3168, 'grad_norm': 0.7065153339421542, 'learning_rate': 2.1547155089456414e-06, 'epoch': 0.7} + 70%|███████ | 8556/12188 [18:25:16<7:12:49, 7.15s/it] 70%|███████ | 8557/12188 [18:25:23<7:08:12, 7.08s/it] {'loss': 0.2939, 'grad_norm': 0.6585356291492391, 'learning_rate': 2.1536230168731394e-06, 'epoch': 0.7} + 70%|███████ | 8557/12188 [18:25:23<7:08:12, 7.08s/it] 70%|███████ | 8558/12188 [18:25:30<7:04:03, 7.01s/it] {'loss': 0.3013, 'grad_norm': 0.6750161653423995, 'learning_rate': 2.1525307258072047e-06, 'epoch': 0.7} + 70%|███████ | 8558/12188 [18:25:30<7:04:03, 7.01s/it] 70%|███████ | 8559/12188 [18:25:37<7:00:28, 6.95s/it] {'loss': 0.3369, 'grad_norm': 0.6237912163783761, 'learning_rate': 2.1514386358249757e-06, 'epoch': 0.7} + 70%|███████ | 8559/12188 [18:25:37<7:00:28, 6.95s/it] 70%|███████ | 8560/12188 [18:25:43<6:55:27, 6.87s/it] {'loss': 0.3152, 'grad_norm': 0.6496107490718095, 'learning_rate': 2.1503467470035747e-06, 'epoch': 0.7} + 70%|███████ | 8560/12188 [18:25:43<6:55:27, 6.87s/it] 70%|███████ | 8561/12188 [18:25:50<6:54:40, 6.86s/it] {'loss': 0.3084, 'grad_norm': 0.646157194935363, 'learning_rate': 2.149255059420107e-06, 'epoch': 0.7} + 70%|███████ | 8561/12188 [18:25:50<6:54:40, 6.86s/it] 70%|███████ | 8562/12188 [18:25:57<7:00:34, 6.96s/it] {'loss': 0.3064, 'grad_norm': 0.7425241077056324, 'learning_rate': 2.1481635731516692e-06, 'epoch': 0.7} + 70%|███████ | 8562/12188 [18:25:57<7:00:34, 6.96s/it] 70%|███████ | 8563/12188 [18:26:05<7:16:51, 7.23s/it] {'loss': 0.3111, 'grad_norm': 0.7584092472321631, 'learning_rate': 2.1470722882753353e-06, 'epoch': 0.7} + 70%|███████ | 8563/12188 [18:26:05<7:16:51, 7.23s/it] 70%|███████ | 8564/12188 [18:26:12<7:12:28, 7.16s/it] {'loss': 0.2878, 'grad_norm': 0.6368848972417931, 'learning_rate': 2.1459812048681735e-06, 'epoch': 0.7} + 70%|███████ | 8564/12188 [18:26:12<7:12:28, 7.16s/it] 70%|███████ | 8565/12188 [18:26:19<7:11:37, 7.15s/it] {'loss': 0.3604, 'grad_norm': 0.714295060602985, 'learning_rate': 2.1448903230072355e-06, 'epoch': 0.7} + 70%|███████ | 8565/12188 [18:26:19<7:11:37, 7.15s/it] 70%|███████ | 8566/12188 [18:26:28<7:48:46, 7.77s/it] {'loss': 0.3261, 'grad_norm': 0.7393786785790648, 'learning_rate': 2.1437996427695535e-06, 'epoch': 0.7} + 70%|███████ | 8566/12188 [18:26:28<7:48:46, 7.77s/it] 70%|███████ | 8567/12188 [18:26:35<7:32:42, 7.50s/it] {'loss': 0.3391, 'grad_norm': 0.8928150089549641, 'learning_rate': 2.142709164232152e-06, 'epoch': 0.7} + 70%|███████ | 8567/12188 [18:26:35<7:32:42, 7.50s/it] 70%|███████ | 8568/12188 [18:26:42<7:20:07, 7.29s/it] {'loss': 0.32, 'grad_norm': 0.7013511494444823, 'learning_rate': 2.1416188874720404e-06, 'epoch': 0.7} + 70%|███████ | 8568/12188 [18:26:42<7:20:07, 7.29s/it] 70%|███████ | 8569/12188 [18:26:49<7:10:49, 7.14s/it] {'loss': 0.3006, 'grad_norm': 0.6503896294082573, 'learning_rate': 2.140528812566211e-06, 'epoch': 0.7} + 70%|███████ | 8569/12188 [18:26:49<7:10:49, 7.14s/it] 70%|███████ | 8570/12188 [18:26:56<7:12:00, 7.16s/it] {'loss': 0.296, 'grad_norm': 0.775166729926355, 'learning_rate': 2.1394389395916408e-06, 'epoch': 0.7} + 70%|███████ | 8570/12188 [18:26:56<7:12:00, 7.16s/it] 70%|███████ | 8571/12188 [18:27:04<7:24:32, 7.37s/it] {'loss': 0.2984, 'grad_norm': 0.7215355820531719, 'learning_rate': 2.1383492686252987e-06, 'epoch': 0.7} + 70%|███████ | 8571/12188 [18:27:04<7:24:32, 7.37s/it] 70%|███████ | 8572/12188 [18:27:11<7:19:06, 7.29s/it] {'loss': 0.2802, 'grad_norm': 0.6847644576726902, 'learning_rate': 2.137259799744132e-06, 'epoch': 0.7} + 70%|███████ | 8572/12188 [18:27:11<7:19:06, 7.29s/it] 70%|███████ | 8573/12188 [18:27:19<7:26:53, 7.42s/it] {'loss': 0.3293, 'grad_norm': 0.6889697259182342, 'learning_rate': 2.1361705330250804e-06, 'epoch': 0.7} + 70%|███████ | 8573/12188 [18:27:19<7:26:53, 7.42s/it] 70%|███████ | 8574/12188 [18:27:27<7:33:36, 7.53s/it] {'loss': 0.2833, 'grad_norm': 0.6728321408944494, 'learning_rate': 2.1350814685450634e-06, 'epoch': 0.7} + 70%|███████ | 8574/12188 [18:27:27<7:33:36, 7.53s/it] 70%|███████ | 8575/12188 [18:27:33<7:20:17, 7.31s/it] {'loss': 0.2993, 'grad_norm': 0.7645750363873952, 'learning_rate': 2.133992606380991e-06, 'epoch': 0.7} + 70%|███████ | 8575/12188 [18:27:33<7:20:17, 7.31s/it] 70%|███████ | 8576/12188 [18:27:41<7:29:05, 7.46s/it] {'loss': 0.3307, 'grad_norm': 0.6381488662192882, 'learning_rate': 2.132903946609756e-06, 'epoch': 0.7} + 70%|███████ | 8576/12188 [18:27:41<7:29:05, 7.46s/it] 70%|███████ | 8577/12188 [18:27:49<7:31:04, 7.50s/it] {'loss': 0.3162, 'grad_norm': 0.6234267222148875, 'learning_rate': 2.1318154893082364e-06, 'epoch': 0.7} + 70%|███████ | 8577/12188 [18:27:49<7:31:04, 7.50s/it] 70%|███████ | 8578/12188 [18:27:56<7:28:35, 7.46s/it] {'loss': 0.2913, 'grad_norm': 0.6905260233112792, 'learning_rate': 2.130727234553301e-06, 'epoch': 0.7} + 70%|███████ | 8578/12188 [18:27:56<7:28:35, 7.46s/it] 70%|███████ | 8579/12188 [18:28:03<7:22:04, 7.35s/it] {'loss': 0.3088, 'grad_norm': 0.6630373067931943, 'learning_rate': 2.129639182421797e-06, 'epoch': 0.7} + 70%|███████ | 8579/12188 [18:28:03<7:22:04, 7.35s/it] 70%|███████ | 8580/12188 [18:28:11<7:30:48, 7.50s/it] {'loss': 0.3376, 'grad_norm': 0.6499555416004164, 'learning_rate': 2.128551332990564e-06, 'epoch': 0.7} + 70%|███████ | 8580/12188 [18:28:11<7:30:48, 7.50s/it] 70%|███████ | 8581/12188 [18:28:19<7:30:09, 7.49s/it] {'loss': 0.295, 'grad_norm': 0.6223806653756995, 'learning_rate': 2.1274636863364205e-06, 'epoch': 0.7} + 70%|███████ | 8581/12188 [18:28:19<7:30:09, 7.49s/it] 70%|███████ | 8582/12188 [18:28:25<7:19:58, 7.32s/it] {'loss': 0.3223, 'grad_norm': 0.7018646338870215, 'learning_rate': 2.1263762425361787e-06, 'epoch': 0.7} + 70%|███████ | 8582/12188 [18:28:25<7:19:58, 7.32s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fd29b618220> +[Try #0] Failed to fetch sample 4529264 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fd29b618220> +Problematic sample: {'image': '20240823_045930_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Privacy & Cookies'"}, {'from': 'gpt', 'value': '\nclick(x=0.5935, y=0.92)\n'}]} + 70%|███████ | 8583/12188 [18:28:32<7:14:16, 7.23s/it] {'loss': 0.2682, 'grad_norm': 0.6578896978934261, 'learning_rate': 2.1252890016666277e-06, 'epoch': 0.7} + 70%|███████ | 8583/12188 [18:28:32<7:14:16, 7.23s/it] 70%|███████ | 8584/12188 [18:28:40<7:15:50, 7.26s/it] {'loss': 0.2697, 'grad_norm': 0.632824383505475, 'learning_rate': 2.124201963804551e-06, 'epoch': 0.7} + 70%|███████ | 8584/12188 [18:28:40<7:15:50, 7.26s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 899, in process_image_unified + visual_processed = processor.preprocess(image, return_tensors="pt") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 432, in preprocess + patches, image_grid_thw = self._preprocess( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 252, in _preprocess + resized_height, resized_width = smart_resize( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 69, in smart_resize + raise ValueError(f"height:{height} and width:{width} must be larger than factor:{factor}") +ValueError: height:24 and width:1660 must be larger than factor:28 +[Try #0] Failed to fetch sample 2210305 in VC:s3://gui-agent/jedi/images/figma400k/figma400k_extracted/. Exception: height:24 and width:1660 must be larger than factor:28 +Problematic sample: {'image': '993930d933fd08dcb69c5d39e23e8664c9d8f9f42fe3808805d989b0e5022b9e.png', 'conversations': [{'from': 'human', 'value': "\nThe visual appearance of this Menu Item is as follows:\n\nVisual Description: The 'Version Control' text appears in white on a dark toolbar background, matching the macOS menu bar styling. The text uses a system font consistent with macOS design guidelines.\n\nRegarding the Menu Item's position:\nLocated in the main application menu bar between 'Run' and 'Tools' menu items. It is positioned in the upper portion of the window in a horizontal menu layout.\n\nUsage and purpose of this Menu Item:\nThis is a clickable menu item that opens a dropdown menu containing version control related options for managing source code and repository operations."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[349, 0, 423, 1000]]'}, {'from': 'human', 'value': 'This Menu Item allows users to:\nThis is a clickable menu item that opens a dropdown menu containing version control related options for managing source code and repository operations.'}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[349, 0, 423, 1000]]'}, {'from': 'human', 'value': "The Menu Item's visual characteristics are as follows:\nThe 'Version Control' text appears in white on a dark toolbar background, matching the macOS menu bar styling. The text uses a system font consistent with macOS design guidelines.\n\nLocation details of the Menu Item:\nLocated in the main application menu bar between 'Run' and 'Tools' menu items. It is positioned in the upper portion of the window in a horizontal menu layout."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[349, 0, 423, 1000]]'}, {'from': 'human', 'value': "Here's where the Menu Item is located:\nLocated at the far right end of the main menu bar, after 'Window' and other system menu items. It's part of the primary horizontal navigation strip at the very top of the application window."}, {'from': 'gpt', 'value': '[[507, 0, 542, 1000]]'}, {'from': 'human', 'value': "Positional details of the Menu Item:\nLocated at the far right end of the main menu bar, after 'Window' and other system menu items. It's part of the primary horizontal navigation strip at the very top of the application window.\n\nRegarding the Menu Item's functionality:\nClicking this menu item reveals a dropdown menu containing help-related options, documentation, and support resources typical of macOS applications. It's a standard system menu that provides access to assistance features."}, {'from': 'gpt', 'value': '[[507, 0, 542, 1000]]'}, {'from': 'human', 'value': "This Menu Item element can be described as follows:\n\nVisual Description: The 'Help' menu item appears in the application's menu bar with white text on a dark gray/black background. It's a standard macOS menu bar item with clear typography and contrasting colors.\n\nThe Menu Item's placement in the interface:\nLocated at the far right end of the main menu bar, after 'Window' and other system menu items. It's part of the primary horizontal navigation strip at the very top of the application window."}, {'from': 'gpt', 'value': '[[507, 0, 542, 1000]]'}, {'from': 'human', 'value': "Spatial context of the Menu Item:\nLocated in the top menu bar between 'File' and 'View' menu items. It's positioned directly below the Apple menu icon and is part of the Visual Studio application's main menu structure.\n\nFunctional capabilities of the Menu Item:\nOpens a dropdown menu containing editing-related commands and operations when clicked. This is a standard menu item that provides access to common editing functions like cut, copy, paste, and other text manipulation operations."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[126, 0, 158, 1000]]'}, {'from': 'human', 'value': "Visually, this Menu Item can be described as:\nThe 'Edit' menu item appears in the main menu bar with white text on a dark background. It's rendered in the standard system font and follows macOS menu styling conventions."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[126, 0, 158, 1000]]'}, {'from': 'human', 'value': "The spatial layout of this Menu Item:\nLocated in the top menu bar between 'File' and 'View' menu items. It's positioned directly below the Apple menu icon and is part of the Visual Studio application's main menu structure."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[126, 0, 158, 1000]]'}, {'from': 'human', 'value': 'This Menu Bar is positioned as follows:\nLocated at the very top of the application window, spanning the full width of the screen. The menu items are arranged horizontally in a single row, with the Apple menu and application name anchored to the left side.\n\nThe functionality of this Menu Bar:\nServes as the main navigation menu bar for the Visual Studio application, providing access to all major application functions and features through dropdown menus. Each label represents a clickable menu that reveals additional options when selected.'}, {'from': 'gpt', 'value': '[[0, 0, 1000, 1000]]'}, {'from': 'human', 'value': "Here's what this Menu Bar looks like:\nA dark-themed menu bar with white text labels, displaying standard application menu items including 'File', 'Edit', 'View', 'Search', 'Project', 'Build', 'Run', 'Version Control', 'Tools', 'Window', and 'Help'. The leftmost item is an Apple logo, followed by 'Visual Studio' as the application name.\n\nIn terms of the Menu Bar's positioning:\nLocated at the very top of the application window, spanning the full width of the screen. The menu items are arranged horizontally in a single row, with the Apple menu and application name anchored to the left side.\n\nThe role of this Menu Bar:\nServes as the main navigation menu bar for the Visual Studio application, providing access to all major application functions and features through dropdown menus. Each label represents a clickable menu that reveals additional options when selected."}, {'from': 'gpt', 'value': '[[0, 0, 1000, 1000]]'}, {'from': 'human', 'value': "The visual attributes of this Menu Bar are:\nA dark-themed menu bar with white text labels, displaying standard application menu items including 'File', 'Edit', 'View', 'Search', 'Project', 'Build', 'Run', 'Version Control', 'Tools', 'Window', and 'Help'. The leftmost item is an Apple logo, followed by 'Visual Studio' as the application name.\n\nPositional details of the Menu Bar:\nLocated at the very top of the application window, spanning the full width of the screen. The menu items are arranged horizontally in a single row, with the Apple menu and application name anchored to the left side."}, {'from': 'gpt', 'value': '[[0, 0, 1000, 1000]]'}, {'from': 'human', 'value': "Here's a detailed visual description of the Menu Item:\nThe 'Tools' menu item appears in the main application menu bar with white text on a dark background. It matches the styling of other menu items like 'Build', 'Run', and 'Window'.\n\nThe Menu Item's intended function:\nWhen clicked, the Tools menu item reveals a dropdown menu containing various tool-related options and commands for the Visual Studio environment."}, {'from': 'gpt', 'value': '[[423, 0, 460, 1000]]'}, {'from': 'human', 'value': "The visual appearance of this Menu Item is as follows:\n\nVisual Description: The 'Tools' menu item appears in the main application menu bar with white text on a dark background. It matches the styling of other menu items like 'Build', 'Run', and 'Window'.\n\nLocation and arrangement of this Menu Item:\nLocated in the main menu bar at the top of the window, positioned between 'Version Control' and 'Window'. It's part of a horizontal list of menu options that spans the width of the application window."}, {'from': 'gpt', 'value': '[[423, 0, 460, 1000]]'}, {'from': 'human', 'value': "Visual features of the Menu Item:\nThe 'Tools' menu item appears in the main application menu bar with white text on a dark background. It matches the styling of other menu items like 'Build', 'Run', and 'Window'.\n\nThe spatial layout of this Menu Item:\nLocated in the main menu bar at the top of the window, positioned between 'Version Control' and 'Window'. It's part of a horizontal list of menu options that spans the width of the application window.\n\nThis Menu Item allows users to:\nWhen clicked, the Tools menu item reveals a dropdown menu containing various tool-related options and commands for the Visual Studio environment."}, {'from': 'gpt', 'value': '[[423, 0, 460, 1000]]'}, {'from': 'human', 'value': "Spatial context of the Menu Item:\nLocated in the main menu bar at the top of the window, positioned between 'Search' and 'Build' menu items. The menu item is sixth from the left in the horizontal menu bar sequence, following 'File', 'Edit', 'View', 'Search', and preceding 'Build', 'Run', 'Version Control'."}, {'from': 'gpt', 'value': '[[237, 0, 281, 1000]]'}, {'from': 'human', 'value': "Visual appearance details of the Menu Item:\nA dark gray tab labeled 'Project' in white text, appearing as part of the main application menu bar. The tab has a slightly darker background than its neighboring menu items, with clear typography that matches the macOS system font.\n\nThis Menu Item serves the following purpose:\nServes as a clickable menu item that reveals a dropdown menu containing project-related commands and options when clicked. Part of the primary navigation system for the IDE interface."}, {'from': 'gpt', 'value': '[[237, 0, 281, 1000]]'}, {'from': 'human', 'value': "Let me describe the visual characteristics of this Menu Item:\nA dark gray tab labeled 'Project' in white text, appearing as part of the main application menu bar. The tab has a slightly darker background than its neighboring menu items, with clear typography that matches the macOS system font.\n\nSpatial context of the Menu Item:\nLocated in the main menu bar at the top of the window, positioned between 'Search' and 'Build' menu items. The menu item is sixth from the left in the horizontal menu bar sequence, following 'File', 'Edit', 'View', 'Search', and preceding 'Build', 'Run', 'Version Control'.\n\nFunctional description of the Menu Item:\nServes as a clickable menu item that reveals a dropdown menu containing project-related commands and options when clicked. Part of the primary navigation system for the IDE interface."}, {'from': 'gpt', 'value': '[[237, 0, 281, 1000]]'}, {'from': 'human', 'value': "This Menu Item element can be described as follows:\n\nVisual Description: A 'Run' menu item in the main application menu bar, styled in the standard macOS menu appearance with white text on a dark background. The text appears in a system font typical of macOS menu items.\n\nThis Menu Item's purpose and usage:\nOpens a dropdown menu containing run-related commands and options for executing the application or code within Visual Studio."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[317, 0, 349, 1000]]'}, {'from': 'human', 'value': "The visual attributes of this Menu Item are:\nA 'Run' menu item in the main application menu bar, styled in the standard macOS menu appearance with white text on a dark background. The text appears in a system font typical of macOS menu items.\n\nLocation and arrangement of this Menu Item:\nLocated in the main menu bar between the 'Build' and 'Version Control' menu items, positioned in the upper portion of the application window. This menu item follows the standard macOS menu bar layout pattern.\n\nThis Menu Item allows users to:\nOpens a dropdown menu containing run-related commands and options for executing the application or code within Visual Studio."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[317, 0, 349, 1000]]'}, {'from': 'human', 'value': "Regarding the Menu Item's functionality:\nOpens a dropdown menu containing run-related commands and options for executing the application or code within Visual Studio."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[317, 0, 349, 1000]]'}, {'from': 'human', 'value': "Here's a detailed visual description of the Menu Bar:\nA dark-themed menu bar displaying primary navigation items including 'File', 'Edit', 'View', 'Search', 'Project', 'Build', 'Run', 'Version Control', 'Tools', 'Window', and 'Help'. The menu bar features white text on a dark gray background, following the standard macOS menu bar design pattern.\n\nLocation and arrangement of this Menu Bar:\nLocated at the very top of the application window, spanning the entire width. The Apple menu icon appears at the far left, followed by 'Visual Studio' and the rest of the menu items arranged horizontally from left to right.\n\nThe functionality of this Menu Bar:\nServes as the main navigation menu for the Visual Studio application, providing access to core functionality and features through dropdown menus. Users can click on any menu item to reveal additional options and commands."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[0, 0, 550, 1000]]'}, {'from': 'human', 'value': "Spatial context of the Menu Bar:\nLocated at the very top of the application window, spanning the entire width. The Apple menu icon appears at the far left, followed by 'Visual Studio' and the rest of the menu items arranged horizontally from left to right.\n\nFunctional capabilities of the Menu Bar:\nServes as the main navigation menu for the Visual Studio application, providing access to core functionality and features through dropdown menus. Users can click on any menu item to reveal additional options and commands."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[0, 0, 550, 1000]]'}, {'from': 'human', 'value': "Here's a detailed visual description of the Menu Bar:\nA dark-themed menu bar displaying primary navigation items including 'File', 'Edit', 'View', 'Search', 'Project', 'Build', 'Run', 'Version Control', 'Tools', 'Window', and 'Help'. The menu bar features white text on a dark gray background, following the standard macOS menu bar design pattern."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[0, 0, 550, 1000]]'}, {'from': 'human', 'value': "The Menu Item's visual characteristics are as follows:\nA dark gray menu item labeled 'Build' in white text, positioned in the main menu bar. The text appears to be using a system standard font, and has a consistent height and padding with neighboring menu items.\n\nThe functionality of this Menu Item:\nOpens a dropdown menu containing build-related commands and options for compiling and constructing the project. This is a standard menu item that users can click to access build functionality in Visual Studio."}, {'from': 'gpt', 'value': '[[281, 0, 317, 1000]]'}, {'from': 'human', 'value': "The Menu Item's visual characteristics are as follows:\nA dark gray menu item labeled 'Build' in white text, positioned in the main menu bar. The text appears to be using a system standard font, and has a consistent height and padding with neighboring menu items.\n\nSpatial context of the Menu Item:\nLocated in the main menu bar at the top of the Visual Studio window, positioned between 'Project' and 'Run' menu items. It's part of a horizontal list of menu options that spans the width of the application window."}, {'from': 'gpt', 'value': '[[281, 0, 317, 1000]]'}, {'from': 'human', 'value': "Visual appearance details of the Menu Item:\nA dark gray menu item labeled 'Build' in white text, positioned in the main menu bar. The text appears to be using a system standard font, and has a consistent height and padding with neighboring menu items."}, {'from': 'gpt', 'value': '[[281, 0, 317, 1000]]'}, {'from': 'human', 'value': 'This Menu Button with Icon allows users to:\nThis is the primary system menu that provides access to core macOS functions like System Preferences, App Store, Force Quit, Sleep, Restart, and Shut Down options. Clicking this icon reveals a dropdown menu with these system-level controls.'}, {'from': 'gpt', 'value': '[[8, 0, 27, 1000]]'}, {'from': 'human', 'value': "The visual attributes of this Menu Button with Icon are:\nThis is the Apple menu icon represented by the classic Apple logo in white, located in the macOS menu bar. It maintains the standard operating system menu style and is monochrome white against the dark menu bar background.\n\nRegarding the Menu Button with Icon's position:\nThe Apple menu icon is positioned at the far left end of the menu bar, appearing as the first item before 'Visual Studio' and other menu items. It is aligned vertically with other menu bar items in the standard macOS menu bar height.\n\nWhat this Menu Button with Icon does:\nThis is the primary system menu that provides access to core macOS functions like System Preferences, App Store, Force Quit, Sleep, Restart, and Shut Down options. Clicking this icon reveals a dropdown menu with these system-level controls."}, {'from': 'gpt', 'value': '[[8, 0, 27, 1000]]'}, {'from': 'human', 'value': "Here's a detailed visual description of the Menu Button with Icon:\nThis is the Apple menu icon represented by the classic Apple logo in white, located in the macOS menu bar. It maintains the standard operating system menu style and is monochrome white against the dark menu bar background."}, {'from': 'gpt', 'value': '[[8, 0, 27, 1000]]'}, {'from': 'human', 'value': "Let me describe the visual characteristics of this Menu item:\nThe word 'Window' appears in white text against a dark toolbar background. It's styled as a standard macOS menu item with a clear, readable font typical of system menus.\n\nThe spatial layout of this Menu item:\nLocated in the main menu bar at the top of the screen, positioned between 'Tools' and 'Help'. The menu item appears in the standard macOS menu position, near the right side of the top menu bar.\n\nFunctional description of the Menu item:\nClicking this menu item reveals a dropdown menu containing window management options, such as minimizing, maximizing, or arranging windows within the Visual Studio application."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[460, 0, 507, 1000]]'}, {'from': 'human', 'value': "The visual attributes of this Menu item are:\nThe word 'Window' appears in white text against a dark toolbar background. It's styled as a standard macOS menu item with a clear, readable font typical of system menus.\n\nPositional details of the Menu item:\nLocated in the main menu bar at the top of the screen, positioned between 'Tools' and 'Help'. The menu item appears in the standard macOS menu position, near the right side of the top menu bar."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[460, 0, 507, 1000]]'}, {'from': 'human', 'value': 'Usage and purpose of this Menu item:\nClicking this menu item reveals a dropdown menu containing window management options, such as minimizing, maximizing, or arranging windows within the Visual Studio application.'}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[460, 0, 507, 1000]]'}, {'from': 'human', 'value': "The visual appearance of this Menu Item is as follows:\n\nVisual Description: The word 'View' appears as a menu item in the top application menu bar, styled in white text against a dark gray background. The text is clear and legible, maintaining the standard macOS menu appearance.\n\nIn terms of the Menu Item's positioning:\nLocated in the main application menu bar at the top, positioned between 'Edit' and 'Search' menu items. It's the fourth item from the left, following 'Visual Studio', 'File', and 'Edit'."}, {'from': 'gpt', 'value': '[[158, 0, 193, 1000]]'}, {'from': 'human', 'value': "The visual attributes of this Menu Item are:\nThe word 'View' appears as a menu item in the top application menu bar, styled in white text against a dark gray background. The text is clear and legible, maintaining the standard macOS menu appearance.\n\nPositional details of the Menu Item:\nLocated in the main application menu bar at the top, positioned between 'Edit' and 'Search' menu items. It's the fourth item from the left, following 'Visual Studio', 'File', and 'Edit'.\n\nThe role of this Menu Item:\nWhen clicked, this menu item reveals a dropdown with view-related options and commands for controlling the application's visual interface and layout settings, following standard macOS menu conventions."}, {'from': 'gpt', 'value': '[[158, 0, 193, 1000]]'}, {'from': 'human', 'value': "This Menu Item is positioned as follows:\nLocated in the main application menu bar at the top, positioned between 'Edit' and 'Search' menu items. It's the fourth item from the left, following 'Visual Studio', 'File', and 'Edit'."}, {'from': 'gpt', 'value': '[[158, 0, 193, 1000]]'}, {'from': 'human', 'value': "Looking at this Menu Item, we can observe:\nThe 'Search' menu item appears in the main application menu bar. It has white text against a dark gray/black background menu bar, consistent with macOS styling. The text uses the system default menu font."}, {'from': 'gpt', 'value': '[[193, 0, 237, 1000]]'}, {'from': 'human', 'value': "The Menu Item's intended function:\nClicking this menu item reveals a dropdown menu with search-related commands and options. It provides access to search functionality within the Visual Studio application."}, {'from': 'gpt', 'value': '[[193, 0, 237, 1000]]'}, {'from': 'human', 'value': "Regarding the Menu Item's position:\nLocated in the main menu bar between 'View' and 'Project' menu items, positioned in the upper portion of the application window. It's part of a standard horizontal menu layout common to macOS applications."}, {'from': 'gpt', 'value': '[[193, 0, 237, 1000]]'}]} + 70%|███████ | 8585/12188 [18:28:47<7:16:52, 7.28s/it] {'loss': 0.33, 'grad_norm': 0.683482550054492, 'learning_rate': 2.123115129026709e-06, 'epoch': 0.7} + 70%|███████ | 8585/12188 [18:28:47<7:16:52, 7.28s/it] 70%|███████ | 8586/12188 [18:28:54<7:11:17, 7.18s/it] {'loss': 0.3386, 'grad_norm': 0.6845386097422029, 'learning_rate': 2.1220284974098565e-06, 'epoch': 0.7} + 70%|███████ | 8586/12188 [18:28:54<7:11:17, 7.18s/it] 70%|███████ | 8587/12188 [18:29:01<7:13:35, 7.22s/it] {'loss': 0.3211, 'grad_norm': 0.6936743228858807, 'learning_rate': 2.120942069030726e-06, 'epoch': 0.7} + 70%|███████ | 8587/12188 [18:29:01<7:13:35, 7.22s/it] 70%|███████ | 8588/12188 [18:29:08<7:09:19, 7.16s/it] {'loss': 0.3039, 'grad_norm': 0.7075021439256101, 'learning_rate': 2.119855843966042e-06, 'epoch': 0.7} + 70%|███████ | 8588/12188 [18:29:08<7:09:19, 7.16s/it] 70%|███████ | 8589/12188 [18:29:16<7:15:13, 7.26s/it] {'loss': 0.2762, 'grad_norm': 0.788423344287218, 'learning_rate': 2.11876982229251e-06, 'epoch': 0.7} + 70%|███████ | 8589/12188 [18:29:16<7:15:13, 7.26s/it] 70%|███████ | 8590/12188 [18:29:23<7:20:15, 7.34s/it] {'loss': 0.2945, 'grad_norm': 0.6253524183535496, 'learning_rate': 2.117684004086824e-06, 'epoch': 0.7} + 70%|███████ | 8590/12188 [18:29:23<7:20:15, 7.34s/it] 70%|███████ | 8591/12188 [18:29:30<7:13:04, 7.22s/it] {'loss': 0.3646, 'grad_norm': 0.719585562886671, 'learning_rate': 2.1165983894256647e-06, 'epoch': 0.7} + 70%|███████ | 8591/12188 [18:29:30<7:13:04, 7.22s/it] 70%|███████ | 8592/12188 [18:29:39<7:38:20, 7.65s/it] {'loss': 0.3157, 'grad_norm': 0.6574490280840557, 'learning_rate': 2.1155129783856932e-06, 'epoch': 0.7} + 70%|███████ | 8592/12188 [18:29:39<7:38:20, 7.65s/it] 71%|███████ | 8593/12188 [18:29:46<7:29:10, 7.50s/it] {'loss': 0.2851, 'grad_norm': 0.7062569352158091, 'learning_rate': 2.1144277710435617e-06, 'epoch': 0.71} + 71%|███████ | 8593/12188 [18:29:46<7:29:10, 7.50s/it] 71%|███████ | 8594/12188 [18:29:53<7:16:33, 7.29s/it] {'loss': 0.3096, 'grad_norm': 0.7274064035696508, 'learning_rate': 2.1133427674759037e-06, 'epoch': 0.71} + 71%|███████ | 8594/12188 [18:29:53<7:16:33, 7.29s/it] 71%|███████ | 8595/12188 [18:30:00<7:10:11, 7.18s/it] {'loss': 0.2882, 'grad_norm': 0.7072660723551519, 'learning_rate': 2.112257967759342e-06, 'epoch': 0.71} + 71%|███████ | 8595/12188 [18:30:00<7:10:11, 7.18s/it] 71%|███████ | 8596/12188 [18:30:07<7:09:35, 7.18s/it] {'loss': 0.2971, 'grad_norm': 0.6647384663281616, 'learning_rate': 2.1111733719704845e-06, 'epoch': 0.71} + 71%|███████ | 8596/12188 [18:30:07<7:09:35, 7.18s/it] 71%|███████ | 8597/12188 [18:30:14<7:00:43, 7.03s/it] {'loss': 0.3086, 'grad_norm': 0.7695134364176482, 'learning_rate': 2.1100889801859226e-06, 'epoch': 0.71} + 71%|███████ | 8597/12188 [18:30:14<7:00:43, 7.03s/it] 71%|███████ | 8598/12188 [18:30:21<7:09:30, 7.18s/it] {'loss': 0.32, 'grad_norm': 0.6973748908148096, 'learning_rate': 2.1090047924822316e-06, 'epoch': 0.71} + 71%|███████ | 8598/12188 [18:30:21<7:09:30, 7.18s/it] 71%|███████ | 8599/12188 [18:30:28<7:02:25, 7.06s/it] {'loss': 0.2943, 'grad_norm': 0.7050904764916824, 'learning_rate': 2.1079208089359797e-06, 'epoch': 0.71} + 71%|███████ | 8599/12188 [18:30:28<7:02:25, 7.06s/it] 71%|███████ | 8600/12188 [18:30:35<6:51:35, 6.88s/it] {'loss': 0.2749, 'grad_norm': 0.6523975802715528, 'learning_rate': 2.1068370296237117e-06, 'epoch': 0.71} + 71%|███████ | 8600/12188 [18:30:35<6:51:35, 6.88s/it] 71%|███████ | 8601/12188 [18:30:41<6:49:50, 6.86s/it] {'loss': 0.3291, 'grad_norm': 0.7179107212436023, 'learning_rate': 2.105753454621966e-06, 'epoch': 0.71} + 71%|███████ | 8601/12188 [18:30:41<6:49:50, 6.86s/it] 71%|███████ | 8602/12188 [18:30:48<6:54:54, 6.94s/it] {'loss': 0.2828, 'grad_norm': 0.6513103401148286, 'learning_rate': 2.10467008400726e-06, 'epoch': 0.71} + 71%|███████ | 8602/12188 [18:30:48<6:54:54, 6.94s/it] 71%|███████ | 8603/12188 [18:30:55<6:52:09, 6.90s/it] {'loss': 0.3315, 'grad_norm': 0.6915826246057426, 'learning_rate': 2.103586917856101e-06, 'epoch': 0.71} + 71%|███████ | 8603/12188 [18:30:55<6:52:09, 6.90s/it] 71%|███████ | 8604/12188 [18:31:02<6:51:55, 6.90s/it] {'loss': 0.3229, 'grad_norm': 0.6645930655339649, 'learning_rate': 2.1025039562449824e-06, 'epoch': 0.71} + 71%|███████ | 8604/12188 [18:31:02<6:51:55, 6.90s/it] 71%|███████ | 8605/12188 [18:31:09<6:51:11, 6.89s/it] {'loss': 0.3175, 'grad_norm': 0.7391944029637704, 'learning_rate': 2.101421199250377e-06, 'epoch': 0.71} + 71%|███████ | 8605/12188 [18:31:09<6:51:11, 6.89s/it] 71%|███████ | 8606/12188 [18:31:16<6:56:34, 6.98s/it] {'loss': 0.3059, 'grad_norm': 0.6701453082421517, 'learning_rate': 2.1003386469487513e-06, 'epoch': 0.71} + 71%|███████ | 8606/12188 [18:31:16<6:56:34, 6.98s/it] 71%|███████ | 8607/12188 [18:31:23<6:53:27, 6.93s/it] {'loss': 0.3225, 'grad_norm': 0.6045579160206003, 'learning_rate': 2.09925629941655e-06, 'epoch': 0.71} + 71%|███████ | 8607/12188 [18:31:23<6:53:27, 6.93s/it] 71%|███████ | 8608/12188 [18:31:30<6:57:12, 6.99s/it] {'loss': 0.3163, 'grad_norm': 0.7827825975514608, 'learning_rate': 2.0981741567302084e-06, 'epoch': 0.71} + 71%|███████ | 8608/12188 [18:31:30<6:57:12, 6.99s/it] 71%|███████ | 8609/12188 [18:31:38<7:15:45, 7.31s/it] {'loss': 0.3031, 'grad_norm': 0.6930913204775817, 'learning_rate': 2.0970922189661475e-06, 'epoch': 0.71} + 71%|███████ | 8609/12188 [18:31:38<7:15:45, 7.31s/it] 71%|███████ | 8610/12188 [18:31:45<7:00:59, 7.06s/it] {'loss': 0.2792, 'grad_norm': 0.6337495686533312, 'learning_rate': 2.0960104862007706e-06, 'epoch': 0.71} + 71%|███████ | 8610/12188 [18:31:45<7:00:59, 7.06s/it] 71%|███████ | 8611/12188 [18:31:52<7:01:22, 7.07s/it] {'loss': 0.3319, 'grad_norm': 0.6191412334291371, 'learning_rate': 2.0949289585104655e-06, 'epoch': 0.71} + 71%|███████ | 8611/12188 [18:31:52<7:01:22, 7.07s/it] 71%|███████ | 8612/12188 [18:31:59<7:08:16, 7.19s/it] {'loss': 0.2816, 'grad_norm': 0.7641686552484275, 'learning_rate': 2.0938476359716115e-06, 'epoch': 0.71} + 71%|███████ | 8612/12188 [18:31:59<7:08:16, 7.19s/it] 71%|███████ | 8613/12188 [18:32:06<7:04:59, 7.13s/it] {'loss': 0.3068, 'grad_norm': 0.6727252629073038, 'learning_rate': 2.092766518660567e-06, 'epoch': 0.71} + 71%|███████ | 8613/12188 [18:32:06<7:04:59, 7.13s/it] 71%|███████ | 8614/12188 [18:32:13<6:57:59, 7.02s/it] {'loss': 0.309, 'grad_norm': 1.4520640939228255, 'learning_rate': 2.091685606653682e-06, 'epoch': 0.71} + 71%|███████ | 8614/12188 [18:32:13<6:57:59, 7.02s/it] 71%|███████ | 8615/12188 [18:32:20<6:53:02, 6.94s/it] {'loss': 0.3267, 'grad_norm': 0.7086534737415748, 'learning_rate': 2.090604900027284e-06, 'epoch': 0.71} + 71%|███████ | 8615/12188 [18:32:20<6:53:02, 6.94s/it] 71%|███████ | 8616/12188 [18:32:27<6:55:25, 6.98s/it] {'loss': 0.2968, 'grad_norm': 0.6872606021659673, 'learning_rate': 2.089524398857697e-06, 'epoch': 0.71} + 71%|███████ | 8616/12188 [18:32:27<6:55:25, 6.98s/it] 71%|███████ | 8617/12188 [18:32:34<6:59:58, 7.06s/it] {'loss': 0.3018, 'grad_norm': 0.7231767888040673, 'learning_rate': 2.088444103221218e-06, 'epoch': 0.71} + 71%|███████ | 8617/12188 [18:32:34<6:59:58, 7.06s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 71%|███████ | 8618/12188 [18:32:40<6:32:17, 6.59s/it] {'loss': 0.6553, 'grad_norm': 0.6904562247404605, 'learning_rate': 2.087364013194139e-06, 'epoch': 0.71} + 71%|███████ | 8618/12188 [18:32:40<6:32:17, 6.59s/it] 71%|██��████ | 8619/12188 [18:32:47<6:53:25, 6.95s/it] {'loss': 0.2992, 'grad_norm': 0.6901026159567332, 'learning_rate': 2.0862841288527363e-06, 'epoch': 0.71} + 71%|███████ | 8619/12188 [18:32:47<6:53:25, 6.95s/it] 71%|███████ | 8620/12188 [18:32:54<6:55:55, 6.99s/it] {'loss': 0.2967, 'grad_norm': 0.6744698657419953, 'learning_rate': 2.085204450273265e-06, 'epoch': 0.71} + 71%|███████ | 8620/12188 [18:32:54<6:55:55, 6.99s/it] 71%|███████ | 8621/12188 [18:33:04<7:48:16, 7.88s/it] {'loss': 0.3266, 'grad_norm': 0.6669800242291101, 'learning_rate': 2.0841249775319722e-06, 'epoch': 0.71} + 71%|███████ | 8621/12188 [18:33:04<7:48:16, 7.88s/it] 71%|███████ | 8622/12188 [18:33:12<7:37:49, 7.70s/it] {'loss': 0.3171, 'grad_norm': 0.7015904203779522, 'learning_rate': 2.0830457107050904e-06, 'epoch': 0.71} + 71%|███████ | 8622/12188 [18:33:12<7:37:49, 7.70s/it] 71%|███████ | 8623/12188 [18:33:18<7:17:09, 7.36s/it] {'loss': 0.2541, 'grad_norm': 0.6388042671821151, 'learning_rate': 2.081966649868833e-06, 'epoch': 0.71} + 71%|███████ | 8623/12188 [18:33:18<7:17:09, 7.36s/it] 71%|███████ | 8624/12188 [18:33:26<7:32:32, 7.62s/it] {'loss': 0.3031, 'grad_norm': 0.684127986464583, 'learning_rate': 2.0808877950994037e-06, 'epoch': 0.71} + 71%|███████ | 8624/12188 [18:33:26<7:32:32, 7.62s/it] 71%|███████ | 8625/12188 [18:33:33<7:19:32, 7.40s/it] {'loss': 0.3348, 'grad_norm': 0.703868487044287, 'learning_rate': 2.0798091464729886e-06, 'epoch': 0.71} + 71%|███████ | 8625/12188 [18:33:33<7:19:32, 7.40s/it] 71%|███████ | 8626/12188 [18:33:41<7:22:17, 7.45s/it] {'loss': 0.3289, 'grad_norm': 0.6097310766540152, 'learning_rate': 2.0787307040657582e-06, 'epoch': 0.71} + 71%|███████ | 8626/12188 [18:33:41<7:22:17, 7.45s/it] 71%|███████ | 8627/12188 [18:33:48<7:21:17, 7.44s/it] {'loss': 0.3063, 'grad_norm': 0.6737255907636763, 'learning_rate': 2.077652467953874e-06, 'epoch': 0.71} + 71%|███████ | 8627/12188 [18:33:48<7:21:17, 7.44s/it] 71%|███████ | 8628/12188 [18:33:55<7:09:57, 7.25s/it] {'loss': 0.3153, 'grad_norm': 0.6787429739979552, 'learning_rate': 2.076574438213475e-06, 'epoch': 0.71} + 71%|███████ | 8628/12188 [18:33:55<7:09:57, 7.25s/it] 71%|███████ | 8629/12188 [18:34:02<7:03:06, 7.13s/it] {'loss': 0.3157, 'grad_norm': 0.7677446726213017, 'learning_rate': 2.0754966149206952e-06, 'epoch': 0.71} + 71%|███████ | 8629/12188 [18:34:02<7:03:06, 7.13s/it] 71%|███████ | 8630/12188 [18:34:09<7:07:48, 7.21s/it] {'loss': 0.293, 'grad_norm': 0.7663581638404202, 'learning_rate': 2.074418998151643e-06, 'epoch': 0.71} + 71%|███████ | 8630/12188 [18:34:09<7:07:48, 7.21s/it] 71%|███████ | 8631/12188 [18:34:16<6:55:37, 7.01s/it] {'loss': 0.3343, 'grad_norm': 0.6829683934271861, 'learning_rate': 2.0733415879824213e-06, 'epoch': 0.71} + 71%|███████ | 8631/12188 [18:34:16<6:55:37, 7.01s/it] 71%|███████ | 8632/12188 [18:34:23<6:54:40, 7.00s/it] {'loss': 0.3146, 'grad_norm': 0.7480549315936282, 'learning_rate': 2.0722643844891167e-06, 'epoch': 0.71} + 71%|███████ | 8632/12188 [18:34:23<6:54:40, 7.00s/it] 71%|███████ | 8633/12188 [18:34:30<6:57:16, 7.04s/it] {'loss': 0.2876, 'grad_norm': 0.6122445800884466, 'learning_rate': 2.0711873877477957e-06, 'epoch': 0.71} + 71%|███████ | 8633/12188 [18:34:30<6:57:16, 7.04s/it] 71%|███████ | 8634/12188 [18:34:37<7:03:58, 7.16s/it] {'loss': 0.3389, 'grad_norm': 0.7085462572151328, 'learning_rate': 2.070110597834517e-06, 'epoch': 0.71} + 71%|███████ | 8634/12188 [18:34:37<7:03:58, 7.16s/it] 71%|███████ | 8635/12188 [18:34:44<6:56:32, 7.03s/it] {'loss': 0.3409, 'grad_norm': 0.6893541053557867, 'learning_rate': 2.0690340148253196e-06, 'epoch': 0.71} + 71%|███████ | 8635/12188 [18:34:44<6:56:32, 7.03s/it] 71%|███████ | 8636/12188 [18:34:51<6:51:53, 6.96s/it] {'loss': 0.276, 'grad_norm': 0.6745415862694574, 'learning_rate': 2.0679576387962307e-06, 'epoch': 0.71} + 71%|███████ | 8636/12188 [18:34:51<6:51:53, 6.96s/it] 71%|███████ | 8637/12188 [18:34:59<7:07:24, 7.22s/it] {'loss': 0.3321, 'grad_norm': 0.6980706060344365, 'learning_rate': 2.066881469823265e-06, 'epoch': 0.71} + 71%|███████ | 8637/12188 [18:34:59<7:07:24, 7.22s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f016ee1f8d0> +[Try #0] Failed to fetch sample 4509545 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f016ee1f8d0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Cookie statement'"}, {'from': 'gpt', 'value': '\nclick(x=0.941, y=0.583)\n'}]} + 71%|███████ | 8638/12188 [18:35:06<7:11:48, 7.30s/it] {'loss': 0.3199, 'grad_norm': 0.7141504450978707, 'learning_rate': 2.0658055079824167e-06, 'epoch': 0.71} + 71%|███████ | 8638/12188 [18:35:06<7:11:48, 7.30s/it] 71%|███████ | 8639/12188 [18:35:13<7:04:55, 7.18s/it] {'loss': 0.3051, 'grad_norm': 0.7035931744296074, 'learning_rate': 2.064729753349668e-06, 'epoch': 0.71} + 71%|███████ | 8639/12188 [18:35:13<7:04:55, 7.18s/it] 71%|███████ | 8640/12188 [18:35:21<7:07:16, 7.23s/it] {'loss': 0.2715, 'grad_norm': 0.6499129031372103, 'learning_rate': 2.0636542060009897e-06, 'epoch': 0.71} + 71%|███████ | 8640/12188 [18:35:21<7:07:16, 7.23s/it] 71%|███████ | 8641/12188 [18:35:29<7:23:02, 7.49s/it] {'loss': 0.3146, 'grad_norm': 0.6794466306456004, 'learning_rate': 2.062578866012332e-06, 'epoch': 0.71} + 71%|███████ | 8641/12188 [18:35:29<7:23:02, 7.49s/it] 71%|███████ | 8642/12188 [18:35:35<7:09:47, 7.27s/it] {'loss': 0.3284, 'grad_norm': 0.7167829474667018, 'learning_rate': 2.0615037334596377e-06, 'epoch': 0.71} + 71%|███████ | 8642/12188 [18:35:35<7:09:47, 7.27s/it] 71%|███████ | 8643/12188 [18:35:43<7:14:13, 7.35s/it] {'loss': 0.2994, 'grad_norm': 0.6963038745728191, 'learning_rate': 2.060428808418826e-06, 'epoch': 0.71} + 71%|███████ | 8643/12188 [18:35:43<7:14:13, 7.35s/it] 71%|███████ | 8644/12188 [18:35:50<7:14:37, 7.36s/it] {'loss': 0.3293, 'grad_norm': 0.6659537093115858, 'learning_rate': 2.05935409096581e-06, 'epoch': 0.71} + 71%|███████ | 8644/12188 [18:35:50<7:14:37, 7.36s/it] 71%|███████ | 8645/12188 [18:35:59<7:39:09, 7.78s/it] {'loss': 0.3052, 'grad_norm': 0.6719761217651491, 'learning_rate': 2.058279581176485e-06, 'epoch': 0.71} + 71%|███████ | 8645/12188 [18:35:59<7:39:09, 7.78s/it] 71%|███████ | 8646/12188 [18:36:06<7:29:53, 7.62s/it] {'loss': 0.2674, 'grad_norm': 0.6293109030020089, 'learning_rate': 2.057205279126728e-06, 'epoch': 0.71} + 71%|███████ | 8646/12188 [18:36:06<7:29:53, 7.62s/it] 71%|███████ | 8647/12188 [18:36:14<7:28:40, 7.60s/it] {'loss': 0.3511, 'grad_norm': 0.6587291431481554, 'learning_rate': 2.0561311848924082e-06, 'epoch': 0.71} + 71%|███████ | 8647/12188 [18:36:14<7:28:40, 7.60s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 90, in pil_loader + return img.convert("RGB") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 993, in convert + self.load() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 319, in load + raise _get_oserror(err_code, encoder=False) +OSError: unrecognized data stream contents when reading image file +[Try #0] Failed to fetch sample 6015359 in VC:s3://gui-agent/data_20250623/windows_augment/images. Exception: unrecognized data stream contents when reading image file +Problematic sample: {'image': 'autocad/20250508_161646_1/images/before_screenshot_1_id_73_internvl_position_crop_0_grounding_instructions_point_o_paste.png', 'conversations': [{'from': 'human', 'value': "\nQuery:Located in the top ribbon interface of AutoCAD Mechanical 2019, within the Block group of commands. It sits to the right of the 'Define Attributes' button and to the left of the 'Point Cloud' section in the ribbon interface.\nOutput only the bounding box in your response"}, {'from': 'gpt', 'value': "Located in the top ribbon interface of AutoCAD Mechanical 2019, within the Block group of commands. It sits to the right of the 'Define Attributes' button and to the left of the 'Point Cloud' section in the ribbon interface.[[158, 341, 169, 372]]"}], 'width': 3600, 'height': 2338} + 71%|███████ | 8648/12188 [18:36:20<7:09:11, 7.27s/it] {'loss': 0.3403, 'grad_norm': 0.7303783115476247, 'learning_rate': 2.0550572985493724e-06, 'epoch': 0.71} + 71%|███████ | 8648/12188 [18:36:20<7:09:11, 7.27s/it] 71%|███████ | 8649/12188 [18:36:27<7:05:20, 7.21s/it] {'loss': 0.336, 'grad_norm': 0.663198649420936, 'learning_rate': 2.053983620173459e-06, 'epoch': 0.71} + 71%|███████ | 8649/12188 [18:36:27<7:05:20, 7.21s/it] 71%|███████ | 8650/12188 [18:36:48<11:03:09, 11.25s/it] {'loss': 0.2837, 'grad_norm': 0.7144309861740779, 'learning_rate': 2.0529101498404906e-06, 'epoch': 0.71} + 71%|███████ | 8650/12188 [18:36:48<11:03:09, 11.25s/it] 71%|███████ | 8651/12188 [18:36:55<9:43:33, 9.90s/it] {'loss': 0.3496, 'grad_norm': 0.6648116836064685, 'learning_rate': 2.0518368876262712e-06, 'epoch': 0.71} + 71%|███████ | 8651/12188 [18:36:55<9:43:33, 9.90s/it] 71%|███████ | 8652/12188 [18:37:02<8:47:58, 8.96s/it] {'loss': 0.3138, 'grad_norm': 0.6552274783472861, 'learning_rate': 2.0507638336065953e-06, 'epoch': 0.71} + 71%|███████ | 8652/12188 [18:37:02<8:47:58, 8.96s/it] 71%|███████ | 8653/12188 [18:37:09<8:25:49, 8.59s/it] {'loss': 0.3028, 'grad_norm': 0.8878190199519229, 'learning_rate': 2.04969098785724e-06, 'epoch': 0.71} + 71%|███████ | 8653/12188 [18:37:09<8:25:49, 8.59s/it] 71%|███████ | 8654/12188 [18:37:44<16:07:50, 16.43s/it] {'loss': 0.2742, 'grad_norm': 0.6292290673177873, 'learning_rate': 2.0486183504539652e-06, 'epoch': 0.71} + 71%|███████ | 8654/12188 [18:37:44<16:07:50, 16.43s/it] 71%|███████ | 8655/12188 [18:37:53<13:57:09, 14.22s/it] {'loss': 0.2819, 'grad_norm': 0.6906193201845551, 'learning_rate': 2.0475459214725223e-06, 'epoch': 0.71} + 71%|███████ | 8655/12188 [18:37:53<13:57:09, 14.22s/it] 71%|███████ | 8656/12188 [18:38:40<23:34:47, 24.03s/it] {'loss': 0.3258, 'grad_norm': 0.678123085780532, 'learning_rate': 2.046473700988641e-06, 'epoch': 0.71} + 71%|███████ | 8656/12188 [18:38:40<23:34:47, 24.03s/it] 71%|███████ | 8657/12188 [18:39:34<32:26:26, 33.07s/it] {'loss': 0.3356, 'grad_norm': 0.6559265506632429, 'learning_rate': 2.0454016890780423e-06, 'epoch': 0.71} + 71%|███████ | 8657/12188 [18:39:34<32:26:26, 33.07s/it] 71%|███████ | 8658/12188 [18:40:17<35:15:18, 35.95s/it] {'loss': 0.315, 'grad_norm': 0.6850375380027687, 'learning_rate': 2.044329885816431e-06, 'epoch': 0.71} + 71%|███████ | 8658/12188 [18:40:17<35:15:18, 35.95s/it] 71%|███████ | 8659/12188 [18:40:24<26:50:15, 27.38s/it] {'loss': 0.3025, 'grad_norm': 0.6914699283162741, 'learning_rate': 2.0432582912794916e-06, 'epoch': 0.71} + 71%|███████ | 8659/12188 [18:40:24<26:50:15, 27.38s/it] 71%|███████ | 8660/12188 [18:40:47<25:18:59, 25.83s/it] {'loss': 0.3373, 'grad_norm': 0.6802687249595705, 'learning_rate': 2.0421869055429044e-06, 'epoch': 0.71} + 71%|███████ | 8660/12188 [18:40:47<25:18:59, 25.83s/it] 71%|███████ | 8661/12188 [18:41:08<23:55:32, 24.42s/it] {'loss': 0.3373, 'grad_norm': 0.722721890471728, 'learning_rate': 2.0411157286823237e-06, 'epoch': 0.71} + 71%|███████ | 8661/12188 [18:41:08<23:55:32, 24.42s/it] 71%|███████ | 8662/12188 [18:41:29<23:03:19, 23.54s/it] {'loss': 0.3004, 'grad_norm': 0.7099993155931762, 'learning_rate': 2.0400447607733962e-06, 'epoch': 0.71} + 71%|███████ | 8662/12188 [18:41:29<23:03:19, 23.54s/it] 71%|███████ | 8663/12188 [18:42:21<31:24:40, 32.08s/it] {'loss': 0.2818, 'grad_norm': 0.876898054551594, 'learning_rate': 2.038974001891754e-06, 'epoch': 0.71} + 71%|███████ | 8663/12188 [18:42:21<31:24:40, 32.08s/it] 71%|███████ | 8664/12188 [18:43:18<38:36:12, 39.44s/it] {'loss': 0.3145, 'grad_norm': 1.2347318125693012, 'learning_rate': 2.0379034521130093e-06, 'epoch': 0.71} + 71%|███████ | 8664/12188 [18:43:18<38:36:12, 39.44s/it] 71%|███████ | 8665/12188 [18:43:25<29:11:25, 29.83s/it] {'loss': 0.2851, 'grad_norm': 0.6385410499012626, 'learning_rate': 2.036833111512765e-06, 'epoch': 0.71} + 71%|███████ | 8665/12188 [18:43:25<29:11:25, 29.83s/it] 71%|███████ | 8666/12188 [18:43:47<26:49:49, 27.42s/it] {'loss': 0.3082, 'grad_norm': 0.7102085080115476, 'learning_rate': 2.035762980166604e-06, 'epoch': 0.71} + 71%|███████ | 8666/12188 [18:43:47<26:49:49, 27.42s/it] 71%|███████ | 8667/12188 [18:43:54<20:45:16, 21.22s/it] {'loss': 0.3128, 'grad_norm': 0.7533600263936452, 'learning_rate': 2.034693058150101e-06, 'epoch': 0.71} + 71%|███████ | 8667/12188 [18:43:54<20:45:16, 21.22s/it] 71%|███████ | 8668/12188 [18:44:02<16:54:48, 17.30s/it] {'loss': 0.293, 'grad_norm': 0.6247757596974288, 'learning_rate': 2.0336233455388097e-06, 'epoch': 0.71} + 71%|███████ | 8668/12188 [18:44:02<16:54:48, 17.30s/it] 71%|███████ | 8669/12188 [18:44:09<13:52:54, 14.20s/it] {'loss': 0.323, 'grad_norm': 0.8117502949322851, 'learning_rate': 2.03255384240827e-06, 'epoch': 0.71} + 71%|███████ | 8669/12188 [18:44:09<13:52:54, 14.20s/it] 71%|███████ | 8670/12188 [18:44:29<15:34:45, 15.94s/it] {'loss': 0.3117, 'grad_norm': 0.6540934059914936, 'learning_rate': 2.031484548834013e-06, 'epoch': 0.71} + 71%|███████ | 8670/12188 [18:44:29<15:34:45, 15.94s/it] 71%|███████ | 8671/12188 [18:44:36<13:03:20, 13.36s/it] {'loss': 0.3314, 'grad_norm': 0.7446605484919162, 'learning_rate': 2.0304154648915452e-06, 'epoch': 0.71} + 71%|███████ | 8671/12188 [18:44:36<13:03:20, 13.36s/it] 71%|███████ | 8672/12188 [18:44:43<11:07:15, 11.39s/it] {'loss': 0.2876, 'grad_norm': 0.6850156051706658, 'learning_rate': 2.029346590656367e-06, 'epoch': 0.71} + 71%|███████ | 8672/12188 [18:44:43<11:07:15, 11.39s/it] 71%|███████ | 8673/12188 [18:44:50<9:45:18, 9.99s/it] {'loss': 0.3344, 'grad_norm': 0.6307427336412192, 'learning_rate': 2.0282779262039608e-06, 'epoch': 0.71} + 71%|███████ | 8673/12188 [18:44:50<9:45:18, 9.99s/it] 71%|███████ | 8674/12188 [18:44:57<9:04:32, 9.30s/it] {'loss': 0.3224, 'grad_norm': 0.7288090746991917, 'learning_rate': 2.0272094716097917e-06, 'epoch': 0.71} + 71%|███████ | 8674/12188 [18:44:57<9:04:32, 9.30s/it] 71%|███████ | 8675/12188 [18:45:23<13:55:58, 14.28s/it] {'loss': 0.3062, 'grad_norm': 0.6428652685926236, 'learning_rate': 2.026141226949314e-06, 'epoch': 0.71} + 71%|███████ | 8675/12188 [18:45:23<13:55:58, 14.28s/it] 71%|███████ | 8676/12188 [18:45:31<12:05:33, 12.40s/it] {'loss': 0.276, 'grad_norm': 0.7068907874430505, 'learning_rate': 2.025073192297966e-06, 'epoch': 0.71} + 71%|███████ | 8676/12188 [18:45:31<12:05:33, 12.40s/it] 71%|███████ | 8677/12188 [18:45:38<10:24:03, 10.66s/it] {'loss': 0.3168, 'grad_norm': 0.7248724682405944, 'learning_rate': 2.0240053677311683e-06, 'epoch': 0.71} + 71%|███████ | 8677/12188 [18:45:38<10:24:03, 10.66s/it] 71%|███████ | 8678/12188 [18:45:46<9:41:17, 9.94s/it] {'loss': 0.308, 'grad_norm': 0.7382768100006186, 'learning_rate': 2.0229377533243317e-06, 'epoch': 0.71} + 71%|███████ | 8678/12188 [18:45:46<9:41:17, 9.94s/it] 71%|███████ | 8679/12188 [18:45:53<8:48:39, 9.04s/it] {'loss': 0.3284, 'grad_norm': 0.677035662653609, 'learning_rate': 2.0218703491528462e-06, 'epoch': 0.71} + 71%|███████ | 8679/12188 [18:45:53<8:48:39, 9.04s/it] 71%|███████ | 8680/12188 [18:46:00<8:06:00, 8.31s/it] {'loss': 0.3298, 'grad_norm': 0.6104823608357483, 'learning_rate': 2.020803155292094e-06, 'epoch': 0.71} + 71%|███████ | 8680/12188 [18:46:00<8:06:00, 8.31s/it] 71%|███████ | 8681/12188 [18:46:21<11:51:47, 12.18s/it] {'loss': 0.3155, 'grad_norm': 0.6689857680469772, 'learning_rate': 2.0197361718174363e-06, 'epoch': 0.71} + 71%|███████ | 8681/12188 [18:46:21<11:51:47, 12.18s/it] 71%|███████ | 8682/12188 [18:46:42<14:23:38, 14.78s/it] {'loss': 0.2814, 'grad_norm': 0.7152276409855604, 'learning_rate': 2.0186693988042205e-06, 'epoch': 0.71} + 71%|███████ | 8682/12188 [18:46:42<14:23:38, 14.78s/it] 71%|███████ | 8683/12188 [18:46:49<12:06:21, 12.43s/it] {'loss': 0.3383, 'grad_norm': 0.7218468617828654, 'learning_rate': 2.017602836327783e-06, 'epoch': 0.71} + 71%|███████ | 8683/12188 [18:46:49<12:06:21, 12.43s/it] 71%|███████▏ | 8684/12188 [18:46:56<10:32:28, 10.83s/it] {'loss': 0.3082, 'grad_norm': 0.7001040040371089, 'learning_rate': 2.0165364844634404e-06, 'epoch': 0.71} + 71%|███████▏ | 8684/12188 [18:46:56<10:32:28, 10.83s/it] 71%|███████▏ | 8685/12188 [18:47:03<9:32:24, 9.80s/it] {'loss': 0.2882, 'grad_norm': 0.6575820168464382, 'learning_rate': 2.015470343286498e-06, 'epoch': 0.71} + 71%|███████▏ | 8685/12188 [18:47:03<9:32:24, 9.80s/it] 71%|███████▏ | 8686/12188 [18:47:10<8:41:35, 8.94s/it] {'loss': 0.3182, 'grad_norm': 0.7161489013148323, 'learning_rate': 2.014404412872246e-06, 'epoch': 0.71} + 71%|███████▏ | 8686/12188 [18:47:10<8:41:35, 8.94s/it] 71%|███████▏ | 8687/12188 [18:47:19<8:34:42, 8.82s/it] {'loss': 0.3177, 'grad_norm': 0.638023874611153, 'learning_rate': 2.013338693295957e-06, 'epoch': 0.71} + 71%|███████▏ | 8687/12188 [18:47:19<8:34:42, 8.82s/it] 71%|███████▏ | 8688/12188 [18:47:26<8:07:08, 8.35s/it] {'loss': 0.282, 'grad_norm': 0.7007224798658402, 'learning_rate': 2.012273184632892e-06, 'epoch': 0.71} + 71%|███████▏ | 8688/12188 [18:47:26<8:07:08, 8.35s/it] 71%|███████▏ | 8689/12188 [18:47:32<7:33:13, 7.77s/it] {'loss': 0.3066, 'grad_norm': 0.6854037190226966, 'learning_rate': 2.0112078869582928e-06, 'epoch': 0.71} + 71%|███████▏ | 8689/12188 [18:47:32<7:33:13, 7.77s/it] 71%|███████▏ | 8690/12188 [18:47:39<7:21:25, 7.57s/it] {'loss': 0.2975, 'grad_norm': 0.7137127874513172, 'learning_rate': 2.010142800347391e-06, 'epoch': 0.71} + 71%|███████▏ | 8690/12188 [18:47:39<7:21:25, 7.57s/it] 71%|███████▏ | 8691/12188 [18:47:46<7:05:56, 7.31s/it] {'loss': 0.3086, 'grad_norm': 0.8343738300029073, 'learning_rate': 2.009077924875403e-06, 'epoch': 0.71} + 71%|███████▏ | 8691/12188 [18:47:46<7:05:56, 7.31s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f6e5020efc0> +[Try #0] Failed to fetch sample 4434067 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f6e5020efc0> +Problematic sample: {'image': '20240823_045930_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Consumer Health Privacy'"}, {'from': 'gpt', 'value': '\nclick(x=0.4845, y=0.92)\n'}]} + 71%|███████▏ | 8692/12188 [18:47:53<7:01:26, 7.23s/it] {'loss': 0.3088, 'grad_norm': 0.6924204660856113, 'learning_rate': 2.0080132606175246e-06, 'epoch': 0.71} + 71%|███████▏ | 8692/12188 [18:47:53<7:01:26, 7.23s/it] 71%|███████▏ | 8693/12188 [18:48:00<6:55:53, 7.14s/it] {'loss': 0.3329, 'grad_norm': 0.7011752927079282, 'learning_rate': 2.0069488076489445e-06, 'epoch': 0.71} + 71%|███████▏ | 8693/12188 [18:48:00<6:55:53, 7.14s/it] 71%|███████▏ | 8694/12188 [18:48:08<7:10:20, 7.39s/it] {'loss': 0.3159, 'grad_norm': 0.640779827868074, 'learning_rate': 2.005884566044829e-06, 'epoch': 0.71} + 71%|███████▏ | 8694/12188 [18:48:08<7:10:20, 7.39s/it] 71%|███████▏ | 8695/12188 [18:48:15<6:56:15, 7.15s/it] {'loss': 0.3455, 'grad_norm': 0.7162119165662145, 'learning_rate': 2.0048205358803373e-06, 'epoch': 0.71} + 71%|███████▏ | 8695/12188 [18:48:15<6:56:15, 7.15s/it] 71%|███████▏ | 8696/12188 [18:48:22<6:56:48, 7.16s/it] {'loss': 0.308, 'grad_norm': 0.6472380773934624, 'learning_rate': 2.0037567172306066e-06, 'epoch': 0.71} + 71%|███████▏ | 8696/12188 [18:48:22<6:56:48, 7.16s/it] 71%|███████▏ | 8697/12188 [18:48:29<6:54:16, 7.12s/it] {'loss': 0.3011, 'grad_norm': 0.6770951335438661, 'learning_rate': 2.0026931101707615e-06, 'epoch': 0.71} + 71%|███████▏ | 8697/12188 [18:48:29<6:54:16, 7.12s/it] 71%|███████▏ | 8698/12188 [18:48:37<7:06:31, 7.33s/it] {'loss': 0.3177, 'grad_norm': 0.6686354215630953, 'learning_rate': 2.0016297147759136e-06, 'epoch': 0.71} + 71%|███████▏ | 8698/12188 [18:48:37<7:06:31, 7.33s/it] 71%|███████▏ | 8699/12188 [18:48:44<7:08:00, 7.36s/it] {'loss': 0.2776, 'grad_norm': 0.7171758847929613, 'learning_rate': 2.0005665311211594e-06, 'epoch': 0.71} + 71%|███████▏ | 8699/12188 [18:48:44<7:08:00, 7.36s/it] 71%|███████▏ | 8700/12188 [18:48:51<7:00:28, 7.23s/it] {'loss': 0.3125, 'grad_norm': 0.7161995053915972, 'learning_rate': 1.9995035592815753e-06, 'epoch': 0.71} + 71%|███████▏ | 8700/12188 [18:48:51<7:00:28, 7.23s/it] 71%|███████▏ | 8701/12188 [18:49:02<7:59:14, 8.25s/it] {'loss': 0.279, 'grad_norm': 0.6738731762597415, 'learning_rate': 1.998440799332232e-06, 'epoch': 0.71} + 71%|███████▏ | 8701/12188 [18:49:02<7:59:14, 8.25s/it] 71%|███████▏ | 8702/12188 [18:49:09<7:33:43, 7.81s/it] {'loss': 0.3076, 'grad_norm': 0.657572797442084, 'learning_rate': 1.9973782513481747e-06, 'epoch': 0.71} + 71%|███████▏ | 8702/12188 [18:49:09<7:33:43, 7.81s/it] 71%|███████▏ | 8703/12188 [18:49:16<7:32:21, 7.79s/it] {'loss': 0.2998, 'grad_norm': 0.7017426929913089, 'learning_rate': 1.9963159154044414e-06, 'epoch': 0.71} + 71%|███████▏ | 8703/12188 [18:49:16<7:32:21, 7.79s/it] 71%|███████▏ | 8704/12188 [18:49:24<7:35:01, 7.84s/it] {'loss': 0.3086, 'grad_norm': 0.6529636914744505, 'learning_rate': 1.9952537915760544e-06, 'epoch': 0.71} + 71%|███████▏ | 8704/12188 [18:49:24<7:35:01, 7.84s/it] 71%|███████▏ | 8705/12188 [18:49:32<7:30:48, 7.77s/it] {'loss': 0.2933, 'grad_norm': 0.6681122031332561, 'learning_rate': 1.994191879938015e-06, 'epoch': 0.71} + 71%|███████▏ | 8705/12188 [18:49:32<7:30:48, 7.77s/it] 71%|███████▏ | 8706/12188 [18:49:39<7:27:08, 7.70s/it] {'loss': 0.3108, 'grad_norm': 0.6860049645809341, 'learning_rate': 1.993130180565318e-06, 'epoch': 0.71} + 71%|███████▏ | 8706/12188 [18:49:39<7:27:08, 7.70s/it] 71%|███████▏ | 8707/12188 [18:49:46<7:10:42, 7.42s/it] {'loss': 0.3017, 'grad_norm': 0.6476169988695959, 'learning_rate': 1.9920686935329347e-06, 'epoch': 0.71} + 71%|███████▏ | 8707/12188 [18:49:46<7:10:42, 7.42s/it] 71%|███████▏ | 8708/12188 [18:49:54<7:24:41, 7.67s/it] {'loss': 0.2927, 'grad_norm': 0.6572155225668831, 'learning_rate': 1.9910074189158295e-06, 'epoch': 0.71} + 71%|███████▏ | 8708/12188 [18:49:54<7:24:41, 7.67s/it] 71%|███████▏ | 8709/12188 [18:50:01<7:09:18, 7.40s/it] {'loss': 0.3477, 'grad_norm': 0.7628580280745553, 'learning_rate': 1.989946356788946e-06, 'epoch': 0.71} + 71%|███████▏ | 8709/12188 [18:50:01<7:09:18, 7.40s/it] 71%|███████▏ | 8710/12188 [18:50:08<7:02:41, 7.29s/it] {'loss': 0.2732, 'grad_norm': 0.6860071728193782, 'learning_rate': 1.988885507227213e-06, 'epoch': 0.71} + 71%|███████▏ | 8710/12188 [18:50:08<7:02:41, 7.29s/it] 71%|███████▏ | 8711/12188 [18:50:15<6:58:29, 7.22s/it] {'loss': 0.3391, 'grad_norm': 0.6996029770844724, 'learning_rate': 1.987824870305548e-06, 'epoch': 0.71} + 71%|███████▏ | 8711/12188 [18:50:15<6:58:29, 7.22s/it] 71%|███████▏ | 8712/12188 [18:50:22<6:54:53, 7.16s/it] {'loss': 0.3119, 'grad_norm': 0.660160426182279, 'learning_rate': 1.9867644460988527e-06, 'epoch': 0.71} + 71%|███████▏ | 8712/12188 [18:50:22<6:54:53, 7.16s/it] 71%|███████▏ | 8713/12188 [18:50:30<7:06:25, 7.36s/it] {'loss': 0.3331, 'grad_norm': 0.6392902697044213, 'learning_rate': 1.9857042346820095e-06, 'epoch': 0.71} + 71%|███████▏ | 8713/12188 [18:50:30<7:06:25, 7.36s/it] 71%|███████▏ | 8714/12188 [18:50:38<7:11:34, 7.45s/it] {'loss': 0.318, 'grad_norm': 0.6745678469968336, 'learning_rate': 1.9846442361298916e-06, 'epoch': 0.71} + 71%|███████▏ | 8714/12188 [18:50:38<7:11:34, 7.45s/it] 72%|███████▏ | 8715/12188 [18:50:45<7:05:12, 7.35s/it] {'loss': 0.2706, 'grad_norm': 0.9354851523593529, 'learning_rate': 1.9835844505173526e-06, 'epoch': 0.72} + 72%|███████▏ | 8715/12188 [18:50:45<7:05:12, 7.35s/it] 72%|███████▏ | 8716/12188 [18:50:52<6:55:36, 7.18s/it] {'loss': 0.3268, 'grad_norm': 0.6911507397676763, 'learning_rate': 1.9825248779192323e-06, 'epoch': 0.72} + 72%|███████▏ | 8716/12188 [18:50:52<6:55:36, 7.18s/it] 72%|███████▏ | 8717/12188 [18:50:59<7:01:20, 7.28s/it] {'loss': 0.3156, 'grad_norm': 0.6895240946049852, 'learning_rate': 1.981465518410359e-06, 'epoch': 0.72} + 72%|███████▏ | 8717/12188 [18:50:59<7:01:20, 7.28s/it] 72%|███████▏ | 8718/12188 [18:51:06<6:54:26, 7.17s/it] {'loss': 0.3152, 'grad_norm': 0.72304840128854, 'learning_rate': 1.9804063720655404e-06, 'epoch': 0.72} + 72%|███████▏ | 8718/12188 [18:51:06<6:54:26, 7.17s/it] 72%|███████▏ | 8719/12188 [18:51:14<7:08:22, 7.41s/it] {'loss': 0.2636, 'grad_norm': 0.6359872403477259, 'learning_rate': 1.9793474389595733e-06, 'epoch': 0.72} + 72%|███████▏ | 8719/12188 [18:51:14<7:08:22, 7.41s/it] 72%|███████▏ | 8720/12188 [18:51:21<7:02:07, 7.30s/it] {'loss': 0.2934, 'grad_norm': 0.6843448035964024, 'learning_rate': 1.9782887191672357e-06, 'epoch': 0.72} + 72%|███████▏ | 8720/12188 [18:51:21<7:02:07, 7.30s/it] 72%|███████▏ | 8721/12188 [18:51:28<6:56:40, 7.21s/it] {'loss': 0.2875, 'grad_norm': 0.673813910342241, 'learning_rate': 1.977230212763296e-06, 'epoch': 0.72} + 72%|███████▏ | 8721/12188 [18:51:28<6:56:40, 7.21s/it] 72%|███████▏ | 8722/12188 [18:51:35<6:54:34, 7.18s/it] {'loss': 0.2906, 'grad_norm': 0.638958724490516, 'learning_rate': 1.9761719198225e-06, 'epoch': 0.72} + 72%|███████▏ | 8722/12188 [18:51:35<6:54:34, 7.18s/it] 72%|███████▏ | 8723/12188 [18:51:42<6:51:28, 7.12s/it] {'loss': 0.3185, 'grad_norm': 0.7310878297815753, 'learning_rate': 1.975113840419587e-06, 'epoch': 0.72} + 72%|███████▏ | 8723/12188 [18:51:42<6:51:28, 7.12s/it] 72%|███████▏ | 8724/12188 [18:51:50<7:08:12, 7.42s/it] {'loss': 0.3168, 'grad_norm': 0.7664561912875966, 'learning_rate': 1.9740559746292744e-06, 'epoch': 0.72} + 72%|███████▏ | 8724/12188 [18:51:50<7:08:12, 7.42s/it] 72%|███████▏ | 8725/12188 [18:51:57<7:00:01, 7.28s/it] {'loss': 0.2936, 'grad_norm': 0.634288471385577, 'learning_rate': 1.9729983225262654e-06, 'epoch': 0.72} + 72%|███████▏ | 8725/12188 [18:51:57<7:00:01, 7.28s/it] 72%|███████▏ | 8726/12188 [18:52:05<6:59:27, 7.27s/it] {'loss': 0.2903, 'grad_norm': 0.7653694470899749, 'learning_rate': 1.971940884185252e-06, 'epoch': 0.72} + 72%|███████▏ | 8726/12188 [18:52:05<6:59:27, 7.27s/it] 72%|███████▏ | 8727/12188 [18:52:12<6:57:53, 7.24s/it] {'loss': 0.288, 'grad_norm': 0.6796593335799707, 'learning_rate': 1.97088365968091e-06, 'epoch': 0.72} + 72%|███████▏ | 8727/12188 [18:52:12<6:57:53, 7.24s/it] 72%|███████▏ | 8728/12188 [18:52:19<6:59:56, 7.28s/it] {'loss': 0.281, 'grad_norm': 0.7491139810162085, 'learning_rate': 1.9698266490878955e-06, 'epoch': 0.72} + 72%|███████▏ | 8728/12188 [18:52:19<6:59:56, 7.28s/it] 72%|███████▏ | 8729/12188 [18:52:25<6:45:10, 7.03s/it] {'loss': 0.2883, 'grad_norm': 0.6520808907302309, 'learning_rate': 1.9687698524808547e-06, 'epoch': 0.72} + 72%|███████▏ | 8729/12188 [18:52:26<6:45:10, 7.03s/it] 72%|███████▏ | 8730/12188 [18:52:33<6:45:04, 7.03s/it] {'loss': 0.2651, 'grad_norm': 0.6743231590253416, 'learning_rate': 1.9677132699344188e-06, 'epoch': 0.72} + 72%|███████▏ | 8730/12188 [18:52:33<6:45:04, 7.03s/it] 72%|███████▏ | 8731/12188 [18:52:39<6:42:17, 6.98s/it] {'loss': 0.3065, 'grad_norm': 0.629026936139408, 'learning_rate': 1.966656901523198e-06, 'epoch': 0.72} + 72%|███████▏ | 8731/12188 [18:52:39<6:42:17, 6.98s/it] 72%|███████▏ | 8732/12188 [18:52:47<6:56:01, 7.22s/it] {'loss': 0.3132, 'grad_norm': 0.6785248266049204, 'learning_rate': 1.965600747321795e-06, 'epoch': 0.72} + 72%|███████▏ | 8732/12188 [18:52:47<6:56:01, 7.22s/it] 72%|███████▏ | 8733/12188 [18:52:54<6:54:24, 7.20s/it] {'loss': 0.3075, 'grad_norm': 0.688124078553897, 'learning_rate': 1.9645448074047907e-06, 'epoch': 0.72} + 72%|███████▏ | 8733/12188 [18:52:54<6:54:24, 7.20s/it] 72%|███████▏ | 8734/12188 [18:53:03<7:14:11, 7.54s/it] {'loss': 0.3034, 'grad_norm': 0.6889873357072427, 'learning_rate': 1.9634890818467573e-06, 'epoch': 0.72} + 72%|███████▏ | 8734/12188 [18:53:03<7:14:11, 7.54s/it] 72%|███████▏ | 8735/12188 [18:53:10<7:08:19, 7.44s/it] {'loss': 0.2945, 'grad_norm': 0.728606433632499, 'learning_rate': 1.9624335707222443e-06, 'epoch': 0.72} + 72%|███████▏ | 8735/12188 [18:53:10<7:08:19, 7.44s/it] 72%|███████▏ | 8736/12188 [18:53:20<7:58:34, 8.32s/it] {'loss': 0.3341, 'grad_norm': 0.7079211978264676, 'learning_rate': 1.961378274105794e-06, 'epoch': 0.72} + 72%|███████▏ | 8736/12188 [18:53:20<7:58:34, 8.32s/it] 72%|███████▏ | 8737/12188 [18:53:28<7:43:16, 8.05s/it] {'loss': 0.2771, 'grad_norm': 0.7233542747930684, 'learning_rate': 1.9603231920719283e-06, 'epoch': 0.72} + 72%|███████▏ | 8737/12188 [18:53:28<7:43:16, 8.05s/it] 72%|███████▏ | 8738/12188 [18:53:35<7:29:31, 7.82s/it] {'loss': 0.3354, 'grad_norm': 0.7164533425825862, 'learning_rate': 1.959268324695153e-06, 'epoch': 0.72} + 72%|███████▏ | 8738/12188 [18:53:35<7:29:31, 7.82s/it] 72%|███████▏ | 8739/12188 [18:53:42<7:24:51, 7.74s/it] {'loss': 0.3173, 'grad_norm': 0.68748022090083, 'learning_rate': 1.958213672049964e-06, 'epoch': 0.72} + 72%|███████▏ | 8739/12188 [18:53:43<7:24:51, 7.74s/it] 72%|███████▏ | 8740/12188 [18:53:49<7:04:44, 7.39s/it] {'loss': 0.3336, 'grad_norm': 0.7181280183541695, 'learning_rate': 1.95715923421084e-06, 'epoch': 0.72} + 72%|███████▏ | 8740/12188 [18:53:49<7:04:44, 7.39s/it] 72%|███████▏ | 8741/12188 [18:53:56<6:59:29, 7.30s/it] {'loss': 0.2971, 'grad_norm': 0.8573974534019405, 'learning_rate': 1.9561050112522395e-06, 'epoch': 0.72} + 72%|███████▏ | 8741/12188 [18:53:56<6:59:29, 7.30s/it] 72%|███████▏ | 8742/12188 [18:54:04<7:05:36, 7.41s/it] {'loss': 0.3248, 'grad_norm': 0.7790223370821655, 'learning_rate': 1.955051003248615e-06, 'epoch': 0.72} + 72%|███████▏ | 8742/12188 [18:54:04<7:05:36, 7.41s/it] 72%|███████▏ | 8743/12188 [18:54:12<7:20:16, 7.67s/it] {'loss': 0.2707, 'grad_norm': 0.6863212878848213, 'learning_rate': 1.9539972102743956e-06, 'epoch': 0.72} + 72%|███████▏ | 8743/12188 [18:54:12<7:20:16, 7.67s/it] 72%|███████▏ | 8744/12188 [18:54:19<7:09:09, 7.48s/it] {'loss': 0.318, 'grad_norm': 0.6587152733547417, 'learning_rate': 1.9529436324039986e-06, 'epoch': 0.72} + 72%|███████▏ | 8744/12188 [18:54:19<7:09:09, 7.48s/it] 72%|███████▏ | 8745/12188 [18:54:26<7:06:05, 7.43s/it] {'loss': 0.3183, 'grad_norm': 0.7203639571840522, 'learning_rate': 1.951890269711829e-06, 'epoch': 0.72} + 72%|��██████▏ | 8745/12188 [18:54:26<7:06:05, 7.43s/it] 72%|███████▏ | 8746/12188 [18:54:34<7:08:16, 7.47s/it] {'loss': 0.2922, 'grad_norm': 0.6521604234764918, 'learning_rate': 1.9508371222722692e-06, 'epoch': 0.72} + 72%|███████▏ | 8746/12188 [18:54:34<7:08:16, 7.47s/it] 72%|███████▏ | 8747/12188 [18:54:41<7:06:48, 7.44s/it] {'loss': 0.2971, 'grad_norm': 0.704289090940811, 'learning_rate': 1.9497841901596932e-06, 'epoch': 0.72} + 72%|███████▏ | 8747/12188 [18:54:41<7:06:48, 7.44s/it] 72%|███████▏ | 8748/12188 [18:54:49<7:09:15, 7.49s/it] {'loss': 0.3074, 'grad_norm': 0.6604199605908143, 'learning_rate': 1.9487314734484593e-06, 'epoch': 0.72} + 72%|███████▏ | 8748/12188 [18:54:49<7:09:15, 7.49s/it] 72%|███████▏ | 8749/12188 [18:54:56<6:56:50, 7.27s/it] {'loss': 0.2814, 'grad_norm': 0.6903273163845, 'learning_rate': 1.947678972212906e-06, 'epoch': 0.72} + 72%|███████▏ | 8749/12188 [18:54:56<6:56:50, 7.27s/it] 72%|███████▏ | 8750/12188 [18:55:02<6:44:24, 7.06s/it] {'loss': 0.2732, 'grad_norm': 0.6657522473314494, 'learning_rate': 1.9466266865273582e-06, 'epoch': 0.72} + 72%|███████▏ | 8750/12188 [18:55:02<6:44:24, 7.06s/it] 72%|███████▏ | 8751/12188 [18:55:10<6:50:57, 7.17s/it] {'loss': 0.3266, 'grad_norm': 0.701178558704494, 'learning_rate': 1.9455746164661297e-06, 'epoch': 0.72} + 72%|███████▏ | 8751/12188 [18:55:10<6:50:57, 7.17s/it] 72%|███████▏ | 8752/12188 [18:55:18<7:08:58, 7.49s/it] {'loss': 0.2972, 'grad_norm': 0.7660913107805865, 'learning_rate': 1.944522762103513e-06, 'epoch': 0.72} + 72%|███████▏ | 8752/12188 [18:55:18<7:08:58, 7.49s/it] 72%|███████▏ | 8753/12188 [18:55:25<7:04:49, 7.42s/it] {'loss': 0.3152, 'grad_norm': 0.6209015876509123, 'learning_rate': 1.9434711235137916e-06, 'epoch': 0.72} + 72%|███████▏ | 8753/12188 [18:55:25<7:04:49, 7.42s/it] 72%|███████▏ | 8754/12188 [18:55:32<6:56:22, 7.28s/it] {'loss': 0.3183, 'grad_norm': 0.7092796810917408, 'learning_rate': 1.9424197007712266e-06, 'epoch': 0.72} + 72%|███████▏ | 8754/12188 [18:55:32<6:56:22, 7.28s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f17cc743560> +[Try #0] Failed to fetch sample 4527118 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f17cc743560> +Problematic sample: {'image': '20240822_131046_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Banded'"}, {'from': 'gpt', 'value': '\nclick(x=0.844, y=0.139)\n'}]} + 72%|███████▏ | 8755/12188 [18:55:40<7:02:56, 7.39s/it] {'loss': 0.3158, 'grad_norm': 0.7252326654755689, 'learning_rate': 1.941368493950072e-06, 'epoch': 0.72} + 72%|███████▏ | 8755/12188 [18:55:40<7:02:56, 7.39s/it] 72%|███████▏ | 8756/12188 [18:55:47<7:02:45, 7.39s/it] {'loss': 0.275, 'grad_norm': 0.7715873946787926, 'learning_rate': 1.9403175031245587e-06, 'epoch': 0.72} + 72%|███████▏ | 8756/12188 [18:55:47<7:02:45, 7.39s/it] 72%|███████▏ | 8757/12188 [18:55:55<7:01:04, 7.36s/it] {'loss': 0.3053, 'grad_norm': 1.132520071578745, 'learning_rate': 1.9392667283689072e-06, 'epoch': 0.72} + 72%|███████▏ | 8757/12188 [18:55:55<7:01:04, 7.36s/it] 72%|███████▏ | 8758/12188 [18:56:01<6:52:27, 7.22s/it] {'loss': 0.2822, 'grad_norm': 0.6698474728264862, 'learning_rate': 1.9382161697573237e-06, 'epoch': 0.72} + 72%|███████▏ | 8758/12188 [18:56:01<6:52:27, 7.22s/it] 72%|███████▏ | 8759/12188 [18:56:09<6:55:26, 7.27s/it] {'loss': 0.3059, 'grad_norm': 0.7406507509093772, 'learning_rate': 1.937165827363994e-06, 'epoch': 0.72} + 72%|███████▏ | 8759/12188 [18:56:09<6:55:26, 7.27s/it] 72%|███████▏ | 8760/12188 [18:56:17<7:03:55, 7.42s/it] {'loss': 0.2942, 'grad_norm': 0.6894411225948008, 'learning_rate': 1.9361157012630923e-06, 'epoch': 0.72} + 72%|███████▏ | 8760/12188 [18:56:17<7:03:55, 7.42s/it] 72%|███████▏ | 8761/12188 [18:56:24<7:04:30, 7.43s/it] {'loss': 0.2856, 'grad_norm': 1.1969129996877022, 'learning_rate': 1.935065791528779e-06, 'epoch': 0.72} + 72%|███████▏ | 8761/12188 [18:56:24<7:04:30, 7.43s/it] 72%|███████▏ | 8762/12188 [18:56:31<6:53:10, 7.24s/it] {'loss': 0.3067, 'grad_norm': 0.632286798168785, 'learning_rate': 1.9340160982351937e-06, 'epoch': 0.72} + 72%|███████▏ | 8762/12188 [18:56:31<6:53:10, 7.24s/it] 72%|███████▏ | 8763/12188 [18:56:39<7:03:35, 7.42s/it] {'loss': 0.2648, 'grad_norm': 0.6432015936173333, 'learning_rate': 1.932966621456467e-06, 'epoch': 0.72} + 72%|███████▏ | 8763/12188 [18:56:39<7:03:35, 7.42s/it] 72%|███████▏ | 8764/12188 [18:56:45<6:51:29, 7.21s/it] {'loss': 0.2824, 'grad_norm': 0.7118834529773037, 'learning_rate': 1.93191736126671e-06, 'epoch': 0.72} + 72%|███████▏ | 8764/12188 [18:56:45<6:51:29, 7.21s/it] 72%|███████▏ | 8765/12188 [18:56:52<6:47:58, 7.15s/it] {'loss': 0.3038, 'grad_norm': 0.6410752759581183, 'learning_rate': 1.9308683177400186e-06, 'epoch': 0.72} + 72%|███████▏ | 8765/12188 [18:56:52<6:47:58, 7.15s/it] 72%|███████▏ | 8766/12188 [18:57:00<6:47:25, 7.14s/it] {'loss': 0.316, 'grad_norm': 1.8205168462918362, 'learning_rate': 1.9298194909504767e-06, 'epoch': 0.72} + 72%|███████▏ | 8766/12188 [18:57:00<6:47:25, 7.14s/it] 72%|███████▏ | 8767/12188 [18:57:07<6:46:10, 7.12s/it] {'loss': 0.284, 'grad_norm': 0.684275314577548, 'learning_rate': 1.9287708809721485e-06, 'epoch': 0.72} + 72%|███████▏ | 8767/12188 [18:57:07<6:46:10, 7.12s/it] 72%|███████▏ | 8768/12188 [18:57:13<6:40:30, 7.03s/it] {'loss': 0.2787, 'grad_norm': 0.7816438680756649, 'learning_rate': 1.9277224878790878e-06, 'epoch': 0.72} + 72%|███████▏ | 8768/12188 [18:57:13<6:40:30, 7.03s/it] 72%|███████▏ | 8769/12188 [18:57:20<6:40:52, 7.03s/it] {'loss': 0.3194, 'grad_norm': 0.6854293826266691, 'learning_rate': 1.9266743117453274e-06, 'epoch': 0.72} + 72%|███████▏ | 8769/12188 [18:57:20<6:40:52, 7.03s/it] 72%|███████▏ | 8770/12188 [18:57:28<6:41:33, 7.05s/it] {'loss': 0.2856, 'grad_norm': 0.7250519662747927, 'learning_rate': 1.925626352644889e-06, 'epoch': 0.72} + 72%|███████▏ | 8770/12188 [18:57:28<6:41:33, 7.05s/it] 72%|███████▏ | 8771/12188 [18:57:34<6:39:42, 7.02s/it] {'loss': 0.2962, 'grad_norm': 0.6510055363592454, 'learning_rate': 1.9245786106517807e-06, 'epoch': 0.72} + 72%|███████▏ | 8771/12188 [18:57:34<6:39:42, 7.02s/it] 72%|███████▏ | 8772/12188 [18:57:41<6:38:18, 7.00s/it] {'loss': 0.2975, 'grad_norm': 0.6589763456106594, 'learning_rate': 1.923531085839988e-06, 'epoch': 0.72} + 72%|███████▏ | 8772/12188 [18:57:41<6:38:18, 7.00s/it] 72%|███████▏ | 8773/12188 [18:57:49<6:54:44, 7.29s/it] {'loss': 0.2739, 'grad_norm': 0.6592441791699473, 'learning_rate': 1.922483778283489e-06, 'epoch': 0.72} + 72%|███████▏ | 8773/12188 [18:57:49<6:54:44, 7.29s/it] 72%|███████▏ | 8774/12188 [18:57:57<6:56:32, 7.32s/it] {'loss': 0.3165, 'grad_norm': 0.8440492478225594, 'learning_rate': 1.921436688056239e-06, 'epoch': 0.72} + 72%|███████▏ | 8774/12188 [18:57:57<6:56:32, 7.32s/it] 72%|███████▏ | 8775/12188 [18:58:05<7:13:22, 7.62s/it] {'loss': 0.3171, 'grad_norm': 0.742855498356306, 'learning_rate': 1.9203898152321843e-06, 'epoch': 0.72} + 72%|███████▏ | 8775/12188 [18:58:05<7:13:22, 7.62s/it] 72%|███████▏ | 8776/12188 [18:58:12<7:07:13, 7.51s/it] {'loss': 0.3478, 'grad_norm': 0.6867595493401953, 'learning_rate': 1.919343159885255e-06, 'epoch': 0.72} + 72%|███████▏ | 8776/12188 [18:58:12<7:07:13, 7.51s/it] 72%|███████▏ | 8777/12188 [18:58:19<7:00:27, 7.40s/it] {'loss': 0.3137, 'grad_norm': 0.6686506992712807, 'learning_rate': 1.9182967220893616e-06, 'epoch': 0.72} + 72%|███████▏ | 8777/12188 [18:58:19<7:00:27, 7.40s/it] 72%|███████▏ | 8778/12188 [18:58:27<7:01:09, 7.41s/it] {'loss': 0.2907, 'grad_norm': 0.6435774257645878, 'learning_rate': 1.917250501918402e-06, 'epoch': 0.72} + 72%|███████▏ | 8778/12188 [18:58:27<7:01:09, 7.41s/it] 72%|███████▏ | 8779/12188 [18:58:35<7:09:05, 7.55s/it] {'loss': 0.3361, 'grad_norm': 0.7749148526204607, 'learning_rate': 1.9162044994462596e-06, 'epoch': 0.72} + 72%|███████▏ | 8779/12188 [18:58:35<7:09:05, 7.55s/it] 72%|███████▏ | 8780/12188 [18:58:42<7:01:15, 7.42s/it] {'loss': 0.3098, 'grad_norm': 0.6810214077202398, 'learning_rate': 1.9151587147468e-06, 'epoch': 0.72} + 72%|███████▏ | 8780/12188 [18:58:42<7:01:15, 7.42s/it] 72%|███████▏ | 8781/12188 [18:58:48<6:43:23, 7.10s/it] {'loss': 0.3063, 'grad_norm': 0.6964930425998914, 'learning_rate': 1.9141131478938772e-06, 'epoch': 0.72} + 72%|███████▏ | 8781/12188 [18:58:48<6:43:23, 7.10s/it] 72%|███████▏ | 8782/12188 [18:58:56<7:01:04, 7.42s/it] {'loss': 0.2793, 'grad_norm': 0.7224259021735384, 'learning_rate': 1.9130677989613246e-06, 'epoch': 0.72} + 72%|███████▏ | 8782/12188 [18:58:56<7:01:04, 7.42s/it] 72%|███████▏ | 8783/12188 [18:59:04<6:56:56, 7.35s/it] {'loss': 0.3158, 'grad_norm': 0.6525144532151577, 'learning_rate': 1.912022668022964e-06, 'epoch': 0.72} + 72%|███████▏ | 8783/12188 [18:59:04<6:56:56, 7.35s/it] 72%|███████▏ | 8784/12188 [18:59:11<7:05:10, 7.49s/it] {'loss': 0.2779, 'grad_norm': 0.7436195919808097, 'learning_rate': 1.910977755152604e-06, 'epoch': 0.72} + 72%|███████▏ | 8784/12188 [18:59:11<7:05:10, 7.49s/it] 72%|███████▏ | 8785/12188 [18:59:20<7:24:56, 7.84s/it] {'loss': 0.2821, 'grad_norm': 0.6706583210356017, 'learning_rate': 1.909933060424029e-06, 'epoch': 0.72} + 72%|███████▏ | 8785/12188 [18:59:20<7:24:56, 7.84s/it] 72%|███████▏ | 8786/12188 [18:59:27<7:08:15, 7.55s/it] {'loss': 0.2963, 'grad_norm': 0.6462332027876059, 'learning_rate': 1.9088885839110193e-06, 'epoch': 0.72} + 72%|███████▏ | 8786/12188 [18:59:27<7:08:15, 7.55s/it] 72%|███████▏ | 8787/12188 [18:59:34<6:54:05, 7.31s/it] {'loss': 0.2698, 'grad_norm': 0.7037894605398226, 'learning_rate': 1.90784432568733e-06, 'epoch': 0.72} + 72%|███████▏ | 8787/12188 [18:59:34<6:54:05, 7.31s/it] 72%|███████▏ | 8788/12188 [18:59:41<6:45:08, 7.15s/it] {'loss': 0.3464, 'grad_norm': 0.724560577537143, 'learning_rate': 1.906800285826706e-06, 'epoch': 0.72} + 72%|███████▏ | 8788/12188 [18:59:41<6:45:08, 7.15s/it] 72%|███████▏ | 8789/12188 [18:59:48<6:55:54, 7.34s/it] {'loss': 0.2959, 'grad_norm': 0.6801639046272174, 'learning_rate': 1.9057564644028782e-06, 'epoch': 0.72} + 72%|███████▏ | 8789/12188 [18:59:48<6:55:54, 7.34s/it] 72%|███████▏ | 8790/12188 [18:59:55<6:50:44, 7.25s/it] {'loss': 0.3193, 'grad_norm': 0.7347541256140651, 'learning_rate': 1.9047128614895555e-06, 'epoch': 0.72} + 72%|███████▏ | 8790/12188 [18:59:55<6:50:44, 7.25s/it] 72%|███████▏ | 8791/12188 [19:00:03<6:49:14, 7.23s/it] {'loss': 0.2798, 'grad_norm': 0.7298803171739334, 'learning_rate': 1.9036694771604397e-06, 'epoch': 0.72} + 72%|███████▏ | 8791/12188 [19:00:03<6:49:14, 7.23s/it] 72%|███████▏ | 8792/12188 [19:00:10<6:51:09, 7.26s/it] {'loss': 0.3361, 'grad_norm': 0.7748411029504626, 'learning_rate': 1.9026263114892107e-06, 'epoch': 0.72} + 72%|███████▏ | 8792/12188 [19:00:10<6:51:09, 7.26s/it] 72%|███████▏ | 8793/12188 [19:00:17<6:44:03, 7.14s/it] {'loss': 0.3447, 'grad_norm': 0.8015120180030773, 'learning_rate': 1.9015833645495335e-06, 'epoch': 0.72} + 72%|███████▏ | 8793/12188 [19:00:17<6:44:03, 7.14s/it] 72%|███████▏ | 8794/12188 [19:00:24<6:43:07, 7.13s/it] {'loss': 0.2863, 'grad_norm': 0.7041721852177341, 'learning_rate': 1.900540636415063e-06, 'epoch': 0.72} + 72%|███████▏ | 8794/12188 [19:00:24<6:43:07, 7.13s/it] 72%|███████▏ | 8795/12188 [19:00:31<6:44:30, 7.15s/it] {'loss': 0.3455, 'grad_norm': 0.6573191328663264, 'learning_rate': 1.899498127159431e-06, 'epoch': 0.72} + 72%|███████▏ | 8795/12188 [19:00:31<6:44:30, 7.15s/it] 72%|███████▏ | 8796/12188 [19:00:38<6:45:59, 7.18s/it] {'loss': 0.2918, 'grad_norm': 0.6557364538772836, 'learning_rate': 1.89845583685626e-06, 'epoch': 0.72} + 72%|███████▏ | 8796/12188 [19:00:38<6:45:59, 7.18s/it] 72%|███████▏ | 8797/12188 [19:00:46<6:52:55, 7.31s/it] {'loss': 0.3124, 'grad_norm': 0.6895814119431082, 'learning_rate': 1.897413765579157e-06, 'epoch': 0.72} + 72%|███████▏ | 8797/12188 [19:00:46<6:52:55, 7.31s/it] 72%|███████▏ | 8798/12188 [19:00:54<7:09:27, 7.60s/it] {'loss': 0.2775, 'grad_norm': 0.6782863682122618, 'learning_rate': 1.8963719134017073e-06, 'epoch': 0.72} + 72%|███████▏ | 8798/12188 [19:00:54<7:09:27, 7.60s/it] 72%|███████▏ | 8799/12188 [19:01:01<7:01:07, 7.46s/it] {'loss': 0.2949, 'grad_norm': 0.6879685570018319, 'learning_rate': 1.8953302803974882e-06, 'epoch': 0.72} + 72%|███████▏ | 8799/12188 [19:01:01<7:01:07, 7.46s/it] 72%|███████▏ | 8800/12188 [19:01:09<6:58:51, 7.42s/it] {'loss': 0.2946, 'grad_norm': 0.7131996602404125, 'learning_rate': 1.894288866640055e-06, 'epoch': 0.72} + 72%|███████▏ | 8800/12188 [19:01:09<6:58:51, 7.42s/it] 72%|███████▏ | 8801/12188 [19:01:15<6:47:44, 7.22s/it] {'loss': 0.332, 'grad_norm': 0.6942787193283043, 'learning_rate': 1.893247672202952e-06, 'epoch': 0.72} + 72%|███████▏ | 8801/12188 [19:01:15<6:47:44, 7.22s/it] 72%|███████▏ | 8802/12188 [19:01:22<6:45:55, 7.19s/it] {'loss': 0.3175, 'grad_norm': 0.6642793689093893, 'learning_rate': 1.8922066971597092e-06, 'epoch': 0.72} + 72%|███████▏ | 8802/12188 [19:01:22<6:45:55, 7.19s/it] 72%|███████▏ | 8803/12188 [19:01:31<7:14:57, 7.71s/it] {'loss': 0.301, 'grad_norm': 0.7020480501135085, 'learning_rate': 1.8911659415838346e-06, 'epoch': 0.72} + 72%|███████▏ | 8803/12188 [19:01:31<7:14:57, 7.71s/it] 72%|███████▏ | 8804/12188 [19:01:38<7:00:16, 7.45s/it] {'loss': 0.3107, 'grad_norm': 0.7275655733368539, 'learning_rate': 1.8901254055488283e-06, 'epoch': 0.72} + 72%|███████▏ | 8804/12188 [19:01:38<7:00:16, 7.45s/it] 72%|���██████▏ | 8805/12188 [19:01:45<6:45:14, 7.19s/it] {'loss': 0.3366, 'grad_norm': 0.6744625258378093, 'learning_rate': 1.8890850891281693e-06, 'epoch': 0.72} + 72%|███████▏ | 8805/12188 [19:01:45<6:45:14, 7.19s/it] 72%|███████▏ | 8806/12188 [19:01:52<6:40:11, 7.10s/it] {'loss': 0.3114, 'grad_norm': 0.6854078898044887, 'learning_rate': 1.8880449923953214e-06, 'epoch': 0.72} + 72%|███████▏ | 8806/12188 [19:01:52<6:40:11, 7.10s/it] 72%|███████▏ | 8807/12188 [19:02:00<6:56:39, 7.39s/it] {'loss': 0.3014, 'grad_norm': 0.66035487417567, 'learning_rate': 1.8870051154237385e-06, 'epoch': 0.72} + 72%|███████▏ | 8807/12188 [19:02:00<6:56:39, 7.39s/it] 72%|███████▏ | 8808/12188 [19:02:08<7:14:04, 7.71s/it] {'loss': 0.2733, 'grad_norm': 0.6738764906199928, 'learning_rate': 1.8859654582868508e-06, 'epoch': 0.72} + 72%|███████▏ | 8808/12188 [19:02:08<7:14:04, 7.71s/it] 72%|███████▏ | 8809/12188 [19:02:16<7:07:48, 7.60s/it] {'loss': 0.2863, 'grad_norm': 0.7079058559144006, 'learning_rate': 1.884926021058081e-06, 'epoch': 0.72} + 72%|███████▏ | 8809/12188 [19:02:16<7:07:48, 7.60s/it] 72%|███████▏ | 8810/12188 [19:02:23<6:57:27, 7.41s/it] {'loss': 0.299, 'grad_norm': 0.6862166718276848, 'learning_rate': 1.8838868038108292e-06, 'epoch': 0.72} + 72%|███████▏ | 8810/12188 [19:02:23<6:57:27, 7.41s/it] 72%|███████▏ | 8811/12188 [19:02:29<6:46:09, 7.22s/it] {'loss': 0.3156, 'grad_norm': 0.7066066093008425, 'learning_rate': 1.8828478066184852e-06, 'epoch': 0.72} + 72%|███████▏ | 8811/12188 [19:02:29<6:46:09, 7.22s/it] 72%|███████▏ | 8812/12188 [19:02:37<6:55:57, 7.39s/it] {'loss': 0.2898, 'grad_norm': 0.719283480678453, 'learning_rate': 1.8818090295544228e-06, 'epoch': 0.72} + 72%|███████▏ | 8812/12188 [19:02:37<6:55:57, 7.39s/it] 72%|███████▏ | 8813/12188 [19:02:44<6:44:07, 7.18s/it] {'loss': 0.3154, 'grad_norm': 0.726069435139811, 'learning_rate': 1.880770472691995e-06, 'epoch': 0.72} + 72%|███████▏ | 8813/12188 [19:02:44<6:44:07, 7.18s/it] 72%|███████▏ | 8814/12188 [19:02:51<6:47:07, 7.24s/it] {'loss': 0.309, 'grad_norm': 0.7196329281084355, 'learning_rate': 1.8797321361045451e-06, 'epoch': 0.72} + 72%|███████▏ | 8814/12188 [19:02:51<6:47:07, 7.24s/it] 72%|███████▏ | 8815/12188 [19:02:58<6:40:12, 7.12s/it] {'loss': 0.3324, 'grad_norm': 0.6199155559817806, 'learning_rate': 1.8786940198654001e-06, 'epoch': 0.72} + 72%|███████▏ | 8815/12188 [19:02:58<6:40:12, 7.12s/it] 72%|███████▏ | 8816/12188 [19:03:05<6:34:28, 7.02s/it] {'loss': 0.3084, 'grad_norm': 0.6772160791625833, 'learning_rate': 1.8776561240478674e-06, 'epoch': 0.72} + 72%|███████▏ | 8816/12188 [19:03:05<6:34:28, 7.02s/it] 72%|███████▏ | 8817/12188 [19:03:12<6:28:54, 6.92s/it] {'loss': 0.3177, 'grad_norm': 0.6595909659357778, 'learning_rate': 1.8766184487252442e-06, 'epoch': 0.72} + 72%|███████▏ | 8817/12188 [19:03:12<6:28:54, 6.92s/it] 72%|███████▏ | 8818/12188 [19:03:18<6:28:40, 6.92s/it] {'loss': 0.3213, 'grad_norm': 0.92626414332437, 'learning_rate': 1.8755809939708063e-06, 'epoch': 0.72} + 72%|███████▏ | 8818/12188 [19:03:18<6:28:40, 6.92s/it] 72%|███████▏ | 8819/12188 [19:03:27<7:00:08, 7.48s/it] {'loss': 0.3065, 'grad_norm': 0.7045815192081347, 'learning_rate': 1.8745437598578208e-06, 'epoch': 0.72} + 72%|███████▏ | 8819/12188 [19:03:27<7:00:08, 7.48s/it] 72%|███████▏ | 8820/12188 [19:03:35<7:01:18, 7.51s/it] {'loss': 0.2787, 'grad_norm': 0.6899888741322161, 'learning_rate': 1.8735067464595336e-06, 'epoch': 0.72} + 72%|███████▏ | 8820/12188 [19:03:35<7:01:18, 7.51s/it] 72%|███████▏ | 8821/12188 [19:03:42<6:53:40, 7.37s/it] {'loss': 0.3126, 'grad_norm': 0.6683851997327388, 'learning_rate': 1.8724699538491747e-06, 'epoch': 0.72} + 72%|███████▏ | 8821/12188 [19:03:42<6:53:40, 7.37s/it] 72%|███████▏ | 8822/12188 [19:03:49<6:53:51, 7.38s/it] {'loss': 0.3083, 'grad_norm': 0.6732871196471715, 'learning_rate': 1.8714333820999647e-06, 'epoch': 0.72} + 72%|███████▏ | 8822/12188 [19:03:49<6:53:51, 7.38s/it] 72%|███████▏ | 8823/12188 [19:03:56<6:43:41, 7.20s/it] {'loss': 0.295, 'grad_norm': 0.7209480050052699, 'learning_rate': 1.8703970312851016e-06, 'epoch': 0.72} + 72%|███████▏ | 8823/12188 [19:03:56<6:43:41, 7.20s/it] 72%|███████▏ | 8824/12188 [19:04:03<6:48:06, 7.28s/it] {'loss': 0.3002, 'grad_norm': 0.648852140855837, 'learning_rate': 1.8693609014777719e-06, 'epoch': 0.72} + 72%|███████▏ | 8824/12188 [19:04:03<6:48:06, 7.28s/it] 72%|███████▏ | 8825/12188 [19:04:11<6:54:40, 7.40s/it] {'loss': 0.3117, 'grad_norm': 0.670943264790055, 'learning_rate': 1.8683249927511471e-06, 'epoch': 0.72} + 72%|███████▏ | 8825/12188 [19:04:11<6:54:40, 7.40s/it] 72%|███████▏ | 8826/12188 [19:04:18<6:47:28, 7.27s/it] {'loss': 0.2963, 'grad_norm': 0.5881688932547685, 'learning_rate': 1.867289305178378e-06, 'epoch': 0.72} + 72%|███████▏ | 8826/12188 [19:04:18<6:47:28, 7.27s/it] 72%|███████▏ | 8827/12188 [19:04:25<6:44:41, 7.22s/it] {'loss': 0.2829, 'grad_norm': 0.6883807119407828, 'learning_rate': 1.8662538388326074e-06, 'epoch': 0.72} + 72%|███████▏ | 8827/12188 [19:04:25<6:44:41, 7.22s/it] 72%|███████▏ | 8828/12188 [19:04:32<6:38:29, 7.12s/it] {'loss': 0.2672, 'grad_norm': 0.6867261931041033, 'learning_rate': 1.865218593786954e-06, 'epoch': 0.72} + 72%|███████▏ | 8828/12188 [19:04:32<6:38:29, 7.12s/it] 72%|███████▏ | 8829/12188 [19:04:39<6:39:14, 7.13s/it] {'loss': 0.3104, 'grad_norm': 0.6898355720169969, 'learning_rate': 1.8641835701145267e-06, 'epoch': 0.72} + 72%|███████▏ | 8829/12188 [19:04:39<6:39:14, 7.13s/it] 72%|███████▏ | 8830/12188 [19:04:47<6:44:27, 7.23s/it] {'loss': 0.3265, 'grad_norm': 0.8012323758290845, 'learning_rate': 1.8631487678884196e-06, 'epoch': 0.72} + 72%|███████▏ | 8830/12188 [19:04:47<6:44:27, 7.23s/it] 72%|███████▏ | 8831/12188 [19:04:54<6:41:22, 7.17s/it] {'loss': 0.3007, 'grad_norm': 0.7210025851788807, 'learning_rate': 1.862114187181705e-06, 'epoch': 0.72} + 72%|███████▏ | 8831/12188 [19:04:54<6:41:22, 7.17s/it] 72%|███████▏ | 8832/12188 [19:05:02<7:05:49, 7.61s/it] {'loss': 0.2949, 'grad_norm': 0.6642923313824369, 'learning_rate': 1.8610798280674468e-06, 'epoch': 0.72} + 72%|███████▏ | 8832/12188 [19:05:02<7:05:49, 7.61s/it] 72%|███████▏ | 8833/12188 [19:05:09<6:50:30, 7.34s/it] {'loss': 0.3256, 'grad_norm': 0.7176748480234693, 'learning_rate': 1.860045690618688e-06, 'epoch': 0.72} + 72%|███████▏ | 8833/12188 [19:05:09<6:50:30, 7.34s/it] 72%|███████▏ | 8834/12188 [19:05:16<6:47:49, 7.30s/it] {'loss': 0.3596, 'grad_norm': 0.7618713645007494, 'learning_rate': 1.8590117749084562e-06, 'epoch': 0.72} + 72%|███████▏ | 8834/12188 [19:05:16<6:47:49, 7.30s/it] 72%|███████▏ | 8835/12188 [19:05:24<6:46:22, 7.27s/it] {'loss': 0.3079, 'grad_norm': 0.6727099422397614, 'learning_rate': 1.8579780810097687e-06, 'epoch': 0.72} + 72%|███████▏ | 8835/12188 [19:05:24<6:46:22, 7.27s/it] 72%|███████▏ | 8836/12188 [19:05:32<7:09:21, 7.69s/it] {'loss': 0.3098, 'grad_norm': 0.7170220558154219, 'learning_rate': 1.8569446089956184e-06, 'epoch': 0.72} + 72%|███████▏ | 8836/12188 [19:05:32<7:09:21, 7.69s/it] 73%|███████▎ | 8837/12188 [19:05:39<6:59:26, 7.51s/it] {'loss': 0.3258, 'grad_norm': 0.688475215638465, 'learning_rate': 1.855911358938991e-06, 'epoch': 0.73} + 73%|███████▎ | 8837/12188 [19:05:39<6:59:26, 7.51s/it] 73%|███████▎ | 8838/12188 [19:05:47<7:06:24, 7.64s/it] {'loss': 0.3223, 'grad_norm': 0.7101931463553803, 'learning_rate': 1.854878330912853e-06, 'epoch': 0.73} + 73%|███████▎ | 8838/12188 [19:05:47<7:06:24, 7.64s/it] 73%|███████▎ | 8839/12188 [19:05:55<7:09:31, 7.70s/it] {'loss': 0.3055, 'grad_norm': 0.6490739159102257, 'learning_rate': 1.8538455249901533e-06, 'epoch': 0.73} + 73%|███████▎ | 8839/12188 [19:05:55<7:09:31, 7.70s/it] 73%|███████▎ | 8840/12188 [19:06:02<7:02:54, 7.58s/it] {'loss': 0.2649, 'grad_norm': 0.6356292577143471, 'learning_rate': 1.852812941243829e-06, 'epoch': 0.73} + 73%|███████▎ | 8840/12188 [19:06:02<7:02:54, 7.58s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 73%|███████▎ | 8841/12188 [19:06:09<6:44:05, 7.24s/it] {'loss': 0.6167, 'grad_norm': 0.576820300034229, 'learning_rate': 1.851780579746797e-06, 'epoch': 0.73} + 73%|███████▎ | 8841/12188 [19:06:09<6:44:05, 7.24s/it] 73%|███████▎ | 8842/12188 [19:06:17<6:56:26, 7.47s/it] {'loss': 0.3023, 'grad_norm': 1.151273974447519, 'learning_rate': 1.8507484405719623e-06, 'epoch': 0.73} + 73%|███████▎ | 8842/12188 [19:06:17<6:56:26, 7.47s/it] 73%|███████▎ | 8843/12188 [19:06:24<6:54:58, 7.44s/it] {'loss': 0.2998, 'grad_norm': 0.720250226410731, 'learning_rate': 1.8497165237922148e-06, 'epoch': 0.73} + 73%|███████▎ | 8843/12188 [19:06:24<6:54:58, 7.44s/it] 73%|███████▎ | 8844/12188 [19:06:32<6:59:33, 7.53s/it] {'loss': 0.3377, 'grad_norm': 0.6752643949968303, 'learning_rate': 1.8486848294804239e-06, 'epoch': 0.73} + 73%|███████▎ | 8844/12188 [19:06:32<6:59:33, 7.53s/it] 73%|███████▎ | 8845/12188 [19:06:39<6:55:18, 7.45s/it] {'loss': 0.3051, 'grad_norm': 0.70824646626698, 'learning_rate': 1.8476533577094485e-06, 'epoch': 0.73} + 73%|███████▎ | 8845/12188 [19:06:39<6:55:18, 7.45s/it] 73%|███████▎ | 8846/12188 [19:06:46<6:45:09, 7.27s/it] {'loss': 0.2908, 'grad_norm': 0.7173601736217289, 'learning_rate': 1.846622108552127e-06, 'epoch': 0.73} + 73%|███████▎ | 8846/12188 [19:06:46<6:45:09, 7.27s/it] 73%|███████▎ | 8847/12188 [19:06:54<7:03:59, 7.61s/it] {'loss': 0.2934, 'grad_norm': 0.6838449103432068, 'learning_rate': 1.8455910820812872e-06, 'epoch': 0.73} + 73%|███████▎ | 8847/12188 [19:06:54<7:03:59, 7.61s/it] 73%|███████▎ | 8848/12188 [19:07:04<7:28:51, 8.06s/it] {'loss': 0.2566, 'grad_norm': 0.6691856123248048, 'learning_rate': 1.8445602783697375e-06, 'epoch': 0.73} + 73%|███████▎ | 8848/12188 [19:07:04<7:28:51, 8.06s/it] 73%|███████▎ | 8849/12188 [19:07:11<7:19:15, 7.89s/it] {'loss': 0.3177, 'grad_norm': 0.7029712969586496, 'learning_rate': 1.84352969749027e-06, 'epoch': 0.73} + 73%|███████▎ | 8849/12188 [19:07:11<7:19:15, 7.89s/it] 73%|███████▎ | 8850/12188 [19:07:19<7:15:25, 7.83s/it] {'loss': 0.3055, 'grad_norm': 0.6513278725103478, 'learning_rate': 1.8424993395156633e-06, 'epoch': 0.73} + 73%|███████▎ | 8850/12188 [19:07:19<7:15:25, 7.83s/it] 73%|███████▎ | 8851/12188 [19:07:26<7:12:54, 7.78s/it] {'loss': 0.2892, 'grad_norm': 0.7263258346916427, 'learning_rate': 1.841469204518682e-06, 'epoch': 0.73} + 73%|███████▎ | 8851/12188 [19:07:26<7:12:54, 7.78s/it] 73%|███████▎ | 8852/12188 [19:07:34<7:07:46, 7.69s/it] {'loss': 0.2947, 'grad_norm': 0.6714821043164698, 'learning_rate': 1.84043929257207e-06, 'epoch': 0.73} + 73%|███████▎ | 8852/12188 [19:07:34<7:07:46, 7.69s/it] 73%|███████▎ | 8853/12188 [19:07:41<6:59:53, 7.55s/it] {'loss': 0.3477, 'grad_norm': 1.0108410497668356, 'learning_rate': 1.8394096037485604e-06, 'epoch': 0.73} + 73%|███���███▎ | 8853/12188 [19:07:41<6:59:53, 7.55s/it] 73%|███████▎ | 8854/12188 [19:07:48<6:48:27, 7.35s/it] {'loss': 0.3351, 'grad_norm': 0.6404947943563707, 'learning_rate': 1.8383801381208644e-06, 'epoch': 0.73} + 73%|███████▎ | 8854/12188 [19:07:48<6:48:27, 7.35s/it] 73%|███████▎ | 8855/12188 [19:07:55<6:38:06, 7.17s/it] {'loss': 0.315, 'grad_norm': 0.6967441097947858, 'learning_rate': 1.8373508957616843e-06, 'epoch': 0.73} + 73%|███████▎ | 8855/12188 [19:07:55<6:38:06, 7.17s/it] 73%|███████▎ | 8856/12188 [19:08:02<6:38:54, 7.18s/it] {'loss': 0.2767, 'grad_norm': 0.7669770029555887, 'learning_rate': 1.836321876743704e-06, 'epoch': 0.73} + 73%|███████▎ | 8856/12188 [19:08:02<6:38:54, 7.18s/it] 73%|███████▎ | 8857/12188 [19:08:10<6:45:30, 7.30s/it] {'loss': 0.3033, 'grad_norm': 0.6313170131867067, 'learning_rate': 1.835293081139588e-06, 'epoch': 0.73} + 73%|███████▎ | 8857/12188 [19:08:10<6:45:30, 7.30s/it] 73%|███████▎ | 8858/12188 [19:08:17<6:44:35, 7.29s/it] {'loss': 0.2883, 'grad_norm': 0.7159420022833884, 'learning_rate': 1.8342645090219924e-06, 'epoch': 0.73} + 73%|███████▎ | 8858/12188 [19:08:17<6:44:35, 7.29s/it] 73%|███████▎ | 8859/12188 [19:08:24<6:36:10, 7.14s/it] {'loss': 0.325, 'grad_norm': 0.6840793940218085, 'learning_rate': 1.8332361604635491e-06, 'epoch': 0.73} + 73%|███████▎ | 8859/12188 [19:08:24<6:36:10, 7.14s/it] 73%|███████▎ | 8860/12188 [19:08:33<7:19:27, 7.92s/it] {'loss': 0.3033, 'grad_norm': 0.66849172317979, 'learning_rate': 1.8322080355368821e-06, 'epoch': 0.73} + 73%|███████▎ | 8860/12188 [19:08:33<7:19:27, 7.92s/it] 73%|███████▎ | 8861/12188 [19:08:40<7:01:04, 7.59s/it] {'loss': 0.3054, 'grad_norm': 0.6453845677082082, 'learning_rate': 1.831180134314594e-06, 'epoch': 0.73} + 73%|███████▎ | 8861/12188 [19:08:40<7:01:04, 7.59s/it] 73%|███████▎ | 8862/12188 [19:08:48<6:59:09, 7.56s/it] {'loss': 0.3026, 'grad_norm': 0.7115284373847901, 'learning_rate': 1.830152456869272e-06, 'epoch': 0.73} + 73%|███████▎ | 8862/12188 [19:08:48<6:59:09, 7.56s/it] 73%|███████▎ | 8863/12188 [19:08:54<6:46:30, 7.34s/it] {'loss': 0.3003, 'grad_norm': 0.6796470649480088, 'learning_rate': 1.8291250032734925e-06, 'epoch': 0.73} + 73%|███████▎ | 8863/12188 [19:08:54<6:46:30, 7.34s/it] 73%|███████▎ | 8864/12188 [19:09:02<6:55:57, 7.51s/it] {'loss': 0.2911, 'grad_norm': 0.752368135748544, 'learning_rate': 1.828097773599809e-06, 'epoch': 0.73} + 73%|███████▎ | 8864/12188 [19:09:02<6:55:57, 7.51s/it] 73%|███████▎ | 8865/12188 [19:09:10<7:02:17, 7.62s/it] {'loss': 0.334, 'grad_norm': 0.6595396116087031, 'learning_rate': 1.827070767920765e-06, 'epoch': 0.73} + 73%|███████▎ | 8865/12188 [19:09:10<7:02:17, 7.62s/it] 73%|███████▎ | 8866/12188 [19:09:19<7:17:29, 7.90s/it] {'loss': 0.2705, 'grad_norm': 0.69135065156498, 'learning_rate': 1.8260439863088874e-06, 'epoch': 0.73} + 73%|███████▎ | 8866/12188 [19:09:19<7:17:29, 7.90s/it] 73%|███████▎ | 8867/12188 [19:09:26<7:03:26, 7.65s/it] {'loss': 0.3486, 'grad_norm': 0.7032325819164614, 'learning_rate': 1.8250174288366822e-06, 'epoch': 0.73} + 73%|███████▎ | 8867/12188 [19:09:26<7:03:26, 7.65s/it] 73%|███████▎ | 8868/12188 [19:09:33<6:53:37, 7.48s/it] {'loss': 0.2697, 'grad_norm': 0.5885878301203463, 'learning_rate': 1.8239910955766454e-06, 'epoch': 0.73} + 73%|███████▎ | 8868/12188 [19:09:33<6:53:37, 7.48s/it] 73%|███████▎ | 8869/12188 [19:09:40<6:46:15, 7.34s/it] {'loss': 0.2792, 'grad_norm': 0.6762828059136307, 'learning_rate': 1.8229649866012562e-06, 'epoch': 0.73} + 73%|███████▎ | 8869/12188 [19:09:40<6:46:15, 7.34s/it] 73%|███████▎ | 8870/12188 [19:09:47<6:44:49, 7.32s/it] {'loss': 0.3229, 'grad_norm': 0.662750488862776, 'learning_rate': 1.821939101982974e-06, 'epoch': 0.73} + 73%|███████▎ | 8870/12188 [19:09:47<6:44:49, 7.32s/it] 73%|███████▎ | 8871/12188 [19:09:54<6:37:12, 7.18s/it] {'loss': 0.3026, 'grad_norm': 0.8036815161052486, 'learning_rate': 1.8209134417942481e-06, 'epoch': 0.73} + 73%|███████▎ | 8871/12188 [19:09:54<6:37:12, 7.18s/it] 73%|███████▎ | 8872/12188 [19:10:02<6:48:18, 7.39s/it] {'loss': 0.3069, 'grad_norm': 0.7045508618350358, 'learning_rate': 1.8198880061075053e-06, 'epoch': 0.73} + 73%|███████▎ | 8872/12188 [19:10:02<6:48:18, 7.39s/it] 73%|███████▎ | 8873/12188 [19:10:09<6:50:08, 7.42s/it] {'loss': 0.3223, 'grad_norm': 0.673816806565814, 'learning_rate': 1.8188627949951642e-06, 'epoch': 0.73} + 73%|███████▎ | 8873/12188 [19:10:10<6:50:08, 7.42s/it] 73%|███████▎ | 8874/12188 [19:10:17<6:47:08, 7.37s/it] {'loss': 0.2913, 'grad_norm': 0.604424077920739, 'learning_rate': 1.81783780852962e-06, 'epoch': 0.73} + 73%|███████▎ | 8874/12188 [19:10:17<6:47:08, 7.37s/it] 73%|███████▎ | 8875/12188 [19:10:25<7:04:55, 7.70s/it] {'loss': 0.332, 'grad_norm': 0.6919733695558113, 'learning_rate': 1.816813046783259e-06, 'epoch': 0.73} + 73%|███████▎ | 8875/12188 [19:10:25<7:04:55, 7.70s/it] 73%|███████▎ | 8876/12188 [19:10:33<7:01:49, 7.64s/it] {'loss': 0.2793, 'grad_norm': 0.6404197063407413, 'learning_rate': 1.815788509828446e-06, 'epoch': 0.73} + 73%|███████▎ | 8876/12188 [19:10:33<7:01:49, 7.64s/it] 73%|███████▎ | 8877/12188 [19:10:39<6:46:14, 7.36s/it] {'loss': 0.3092, 'grad_norm': 0.7311882236607521, 'learning_rate': 1.8147641977375313e-06, 'epoch': 0.73} + 73%|███████▎ | 8877/12188 [19:10:39<6:46:14, 7.36s/it] 73%|███████▎ | 8878/12188 [19:10:47<6:48:27, 7.40s/it] {'loss': 0.305, 'grad_norm': 0.6903424810501663, 'learning_rate': 1.8137401105828518e-06, 'epoch': 0.73} + 73%|███████▎ | 8878/12188 [19:10:47<6:48:27, 7.40s/it] 73%|███████▎ | 8879/12188 [19:10:54<6:36:33, 7.19s/it] {'loss': 0.3507, 'grad_norm': 0.6863868323309782, 'learning_rate': 1.8127162484367277e-06, 'epoch': 0.73} + 73%|███████▎ | 8879/12188 [19:10:54<6:36:33, 7.19s/it] 73%|███████▎ | 8880/12188 [19:11:00<6:30:24, 7.08s/it] {'loss': 0.2663, 'grad_norm': 0.6607153036055354, 'learning_rate': 1.81169261137146e-06, 'epoch': 0.73} + 73%|███████▎ | 8880/12188 [19:11:00<6:30:24, 7.08s/it] 73%|███████▎ | 8881/12188 [19:11:07<6:20:45, 6.91s/it] {'loss': 0.3382, 'grad_norm': 0.7087174897990361, 'learning_rate': 1.810669199459339e-06, 'epoch': 0.73} + 73%|███████▎ | 8881/12188 [19:11:07<6:20:45, 6.91s/it] 73%|███████▎ | 8882/12188 [19:11:16<6:52:56, 7.49s/it] {'loss': 0.2933, 'grad_norm': 0.6754172499892703, 'learning_rate': 1.809646012772634e-06, 'epoch': 0.73} + 73%|███████▎ | 8882/12188 [19:11:16<6:52:56, 7.49s/it] 73%|███████▎ | 8883/12188 [19:11:23<6:49:00, 7.43s/it] {'loss': 0.3223, 'grad_norm': 0.7126754303822509, 'learning_rate': 1.8086230513836011e-06, 'epoch': 0.73} + 73%|███████▎ | 8883/12188 [19:11:23<6:49:00, 7.43s/it] 73%|███████▎ | 8884/12188 [19:11:31<6:51:49, 7.48s/it] {'loss': 0.2824, 'grad_norm': 0.6832834666812259, 'learning_rate': 1.8076003153644834e-06, 'epoch': 0.73} + 73%|███████▎ | 8884/12188 [19:11:31<6:51:49, 7.48s/it] 73%|███████▎ | 8885/12188 [19:11:37<6:39:01, 7.25s/it] {'loss': 0.3303, 'grad_norm': 0.6641371437820992, 'learning_rate': 1.8065778047875005e-06, 'epoch': 0.73} + 73%|███████▎ | 8885/12188 [19:11:37<6:39:01, 7.25s/it] 73%|███████▎ | 8886/12188 [19:11:45<6:51:49, 7.48s/it] {'loss': 0.2874, 'grad_norm': 0.6633316168850799, 'learning_rate': 1.805555519724862e-06, 'epoch': 0.73} + 73%|███████▎ | 8886/12188 [19:11:45<6:51:49, 7.48s/it] 73%|███████▎ | 8887/12188 [19:11:53<6:54:35, 7.54s/it] {'loss': 0.2818, 'grad_norm': 0.6734861520315042, 'learning_rate': 1.8045334602487624e-06, 'epoch': 0.73} + 73%|███████▎ | 8887/12188 [19:11:53<6:54:35, 7.54s/it] 73%|███████▎ | 8888/12188 [19:12:01<6:55:01, 7.55s/it] {'loss': 0.2929, 'grad_norm': 0.654402076792195, 'learning_rate': 1.8035116264313763e-06, 'epoch': 0.73} + 73%|███████▎ | 8888/12188 [19:12:01<6:55:01, 7.55s/it] 73%|███████▎ | 8889/12188 [19:12:08<6:53:36, 7.52s/it] {'loss': 0.298, 'grad_norm': 0.6836213374338516, 'learning_rate': 1.802490018344863e-06, 'epoch': 0.73} + 73%|███████▎ | 8889/12188 [19:12:08<6:53:36, 7.52s/it] 73%|███████▎ | 8890/12188 [19:12:15<6:37:52, 7.24s/it] {'loss': 0.3219, 'grad_norm': 0.6369187636363243, 'learning_rate': 1.8014686360613658e-06, 'epoch': 0.73} + 73%|███████▎ | 8890/12188 [19:12:15<6:37:52, 7.24s/it] 73%|███████▎ | 8891/12188 [19:12:22<6:33:49, 7.17s/it] {'loss': 0.3244, 'grad_norm': 0.688213524768224, 'learning_rate': 1.8004474796530153e-06, 'epoch': 0.73} + 73%|███████▎ | 8891/12188 [19:12:22<6:33:49, 7.17s/it] 73%|███████▎ | 8892/12188 [19:12:30<6:51:06, 7.48s/it] {'loss': 0.3174, 'grad_norm': 0.7308210060859348, 'learning_rate': 1.7994265491919245e-06, 'epoch': 0.73} + 73%|███████▎ | 8892/12188 [19:12:30<6:51:06, 7.48s/it] 73%|███████▎ | 8893/12188 [19:12:37<6:51:43, 7.50s/it] {'loss': 0.3102, 'grad_norm': 0.7350746576441208, 'learning_rate': 1.7984058447501868e-06, 'epoch': 0.73} + 73%|███████▎ | 8893/12188 [19:12:37<6:51:43, 7.50s/it] 73%|███████▎ | 8894/12188 [19:12:46<7:03:35, 7.72s/it] {'loss': 0.3041, 'grad_norm': 0.735404779461389, 'learning_rate': 1.797385366399887e-06, 'epoch': 0.73} + 73%|███████▎ | 8894/12188 [19:12:46<7:03:35, 7.72s/it] 73%|███████▎ | 8895/12188 [19:12:53<6:57:13, 7.60s/it] {'loss': 0.2814, 'grad_norm': 0.684381450203058, 'learning_rate': 1.796365114213085e-06, 'epoch': 0.73} + 73%|███████▎ | 8895/12188 [19:12:53<6:57:13, 7.60s/it] 73%|███████▎ | 8896/12188 [19:13:00<6:45:52, 7.40s/it] {'loss': 0.3104, 'grad_norm': 0.7836529694760743, 'learning_rate': 1.7953450882618317e-06, 'epoch': 0.73} + 73%|███████▎ | 8896/12188 [19:13:00<6:45:52, 7.40s/it] 73%|███████▎ | 8897/12188 [19:13:08<6:49:54, 7.47s/it] {'loss': 0.3471, 'grad_norm': 0.7729638970670301, 'learning_rate': 1.7943252886181612e-06, 'epoch': 0.73} + 73%|███████▎ | 8897/12188 [19:13:08<6:49:54, 7.47s/it] 73%|███████▎ | 8898/12188 [19:13:15<6:52:34, 7.52s/it] {'loss': 0.295, 'grad_norm': 0.6717011820125501, 'learning_rate': 1.7933057153540868e-06, 'epoch': 0.73} + 73%|███████▎ | 8898/12188 [19:13:15<6:52:34, 7.52s/it] 73%|███████▎ | 8899/12188 [19:13:22<6:41:09, 7.32s/it] {'loss': 0.3566, 'grad_norm': 0.7619550357171175, 'learning_rate': 1.7922863685416126e-06, 'epoch': 0.73} + 73%|███████▎ | 8899/12188 [19:13:22<6:41:09, 7.32s/it] 73%|███████▎ | 8900/12188 [19:13:29<6:34:28, 7.20s/it] {'loss': 0.3232, 'grad_norm': 0.7363395153225991, 'learning_rate': 1.79126724825272e-06, 'epoch': 0.73} + 73%|███████▎ | 8900/12188 [19:13:29<6:34:28, 7.20s/it] 73%|███████▎ | 8901/12188 [19:13:36<6:36:51, 7.24s/it] {'loss': 0.3232, 'grad_norm': 0.6647112492675581, 'learning_rate': 1.7902483545593807e-06, 'epoch': 0.73} + 73%|███████▎ | 8901/12188 [19:13:36<6:36:51, 7.24s/it] 73%|███████▎ | 8902/12188 [19:13:44<6:39:43, 7.30s/it] {'loss': 0.2782, 'grad_norm': 0.7532844624103986, 'learning_rate': 1.7892296875335435e-06, 'epoch': 0.73} + 73%|███████▎ | 8902/12188 [19:13:44<6:39:43, 7.30s/it] 73%|███████▎ | 8903/12188 [19:13:50<6:23:23, 7.00s/it] {'loss': 0.3261, 'grad_norm': 0.7170199058415745, 'learning_rate': 1.7882112472471497e-06, 'epoch': 0.73} + 73%|███████▎ | 8903/12188 [19:13:50<6:23:23, 7.00s/it] 73%|███████▎ | 8904/12188 [19:13:57<6:16:07, 6.87s/it] {'loss': 0.3151, 'grad_norm': 0.6442763958926027, 'learning_rate': 1.7871930337721155e-06, 'epoch': 0.73} + 73%|███████▎ | 8904/12188 [19:13:57<6:16:07, 6.87s/it] 73%|███████▎ | 8905/12188 [19:14:05<6:36:59, 7.26s/it] {'loss': 0.3029, 'grad_norm': 0.6994705146422289, 'learning_rate': 1.7861750471803491e-06, 'epoch': 0.73} + 73%|███████▎ | 8905/12188 [19:14:05<6:36:59, 7.26s/it] 73%|███████▎ | 8906/12188 [19:14:12<6:35:29, 7.23s/it] {'loss': 0.3057, 'grad_norm': 1.061815164668363, 'learning_rate': 1.7851572875437361e-06, 'epoch': 0.73} + 73%|███████▎ | 8906/12188 [19:14:12<6:35:29, 7.23s/it] 73%|███████▎ | 8907/12188 [19:14:19<6:36:20, 7.25s/it] {'loss': 0.3464, 'grad_norm': 0.6499582689089072, 'learning_rate': 1.7841397549341516e-06, 'epoch': 0.73} + 73%|███████▎ | 8907/12188 [19:14:19<6:36:20, 7.25s/it] 73%|███████▎ | 8908/12188 [19:14:27<6:37:29, 7.27s/it] {'loss': 0.3036, 'grad_norm': 0.6848945673492225, 'learning_rate': 1.7831224494234495e-06, 'epoch': 0.73} + 73%|███████▎ | 8908/12188 [19:14:27<6:37:29, 7.27s/it] 73%|███████▎ | 8909/12188 [19:14:34<6:39:21, 7.31s/it] {'loss': 0.2836, 'grad_norm': 0.6979204182866767, 'learning_rate': 1.7821053710834719e-06, 'epoch': 0.73} + 73%|███████▎ | 8909/12188 [19:14:34<6:39:21, 7.31s/it] 73%|███████▎ | 8910/12188 [19:14:41<6:32:14, 7.18s/it] {'loss': 0.2944, 'grad_norm': 0.7221067366929906, 'learning_rate': 1.7810885199860446e-06, 'epoch': 0.73} + 73%|███████▎ | 8910/12188 [19:14:41<6:32:14, 7.18s/it] 73%|███████▎ | 8911/12188 [19:14:48<6:27:40, 7.10s/it] {'loss': 0.295, 'grad_norm': 0.7049993819394008, 'learning_rate': 1.7800718962029728e-06, 'epoch': 0.73} + 73%|███████▎ | 8911/12188 [19:14:48<6:27:40, 7.10s/it] 73%|███████▎ | 8912/12188 [19:14:55<6:24:46, 7.05s/it] {'loss': 0.2942, 'grad_norm': 0.693329785297274, 'learning_rate': 1.779055499806052e-06, 'epoch': 0.73} + 73%|███████▎ | 8912/12188 [19:14:55<6:24:46, 7.05s/it] 73%|███████▎ | 8913/12188 [19:15:03<6:45:04, 7.42s/it] {'loss': 0.2853, 'grad_norm': 0.6479530375806514, 'learning_rate': 1.7780393308670552e-06, 'epoch': 0.73} + 73%|███████▎ | 8913/12188 [19:15:03<6:45:04, 7.42s/it] 73%|███████▎ | 8914/12188 [19:15:10<6:30:59, 7.17s/it] {'loss': 0.3365, 'grad_norm': 0.6904452901421244, 'learning_rate': 1.7770233894577448e-06, 'epoch': 0.73} + 73%|███████▎ | 8914/12188 [19:15:10<6:30:59, 7.17s/it] 73%|███████▎ | 8915/12188 [19:15:18<6:46:50, 7.46s/it] {'loss': 0.2979, 'grad_norm': 0.6661733277300433, 'learning_rate': 1.7760076756498657e-06, 'epoch': 0.73} + 73%|███████▎ | 8915/12188 [19:15:18<6:46:50, 7.46s/it] 73%|███████▎ | 8916/12188 [19:15:25<6:44:19, 7.41s/it] {'loss': 0.3208, 'grad_norm': 0.8167327706738282, 'learning_rate': 1.774992189515145e-06, 'epoch': 0.73} + 73%|███████▎ | 8916/12188 [19:15:25<6:44:19, 7.41s/it] 73%|███████▎ | 8917/12188 [19:15:33<6:46:42, 7.46s/it] {'loss': 0.3107, 'grad_norm': 0.7220385676207921, 'learning_rate': 1.7739769311252947e-06, 'epoch': 0.73} + 73%|███████▎ | 8917/12188 [19:15:33<6:46:42, 7.46s/it] 73%|███████▎ | 8918/12188 [19:15:40<6:46:30, 7.46s/it] {'loss': 0.2924, 'grad_norm': 0.6396824200105194, 'learning_rate': 1.7729619005520087e-06, 'epoch': 0.73} + 73%|███████▎ | 8918/12188 [19:15:40<6:46:30, 7.46s/it] 73%|███████▎ | 8919/12188 [19:15:47<6:45:04, 7.43s/it] {'loss': 0.2752, 'grad_norm': 0.7343301429303518, 'learning_rate': 1.771947097866969e-06, 'epoch': 0.73} + 73%|███████▎ | 8919/12188 [19:15:47<6:45:04, 7.43s/it] 73%|███████▎ | 8920/12188 [19:15:54<6:36:19, 7.28s/it] {'loss': 0.2803, 'grad_norm': 0.6880298948833085, 'learning_rate': 1.77093252314184e-06, 'epoch': 0.73} + 73%|███████▎ | 8920/12188 [19:15:54<6:36:19, 7.28s/it] 73%|███████▎ | 8921/12188 [19:16:02<6:47:05, 7.48s/it] {'loss': 0.3172, 'grad_norm': 0.7414191287463398, 'learning_rate': 1.769918176448267e-06, 'epoch': 0.73} + 73%|███████▎ | 8921/12188 [19:16:02<6:47:05, 7.48s/it] 73%|███████▎ | 8922/12188 [19:16:10<6:55:21, 7.63s/it] {'loss': 0.3283, 'grad_norm': 0.7070686578963368, 'learning_rate': 1.7689040578578825e-06, 'epoch': 0.73} + 73%|███████▎ | 8922/12188 [19:16:10<6:55:21, 7.63s/it] 73%|███████▎ | 8923/12188 [19:16:18<6:57:56, 7.68s/it] {'loss': 0.3026, 'grad_norm': 0.691445984996701, 'learning_rate': 1.7678901674423044e-06, 'epoch': 0.73} + 73%|███████▎ | 8923/12188 [19:16:18<6:57:56, 7.68s/it] 73%|███████▎ | 8924/12188 [19:16:25<6:53:54, 7.61s/it] {'loss': 0.2935, 'grad_norm': 0.7313608467639027, 'learning_rate': 1.7668765052731285e-06, 'epoch': 0.73} + 73%|███████▎ | 8924/12188 [19:16:25<6:53:54, 7.61s/it] 73%|███████▎ | 8925/12188 [19:16:33<6:49:37, 7.53s/it] {'loss': 0.3024, 'grad_norm': 0.7598804432429502, 'learning_rate': 1.7658630714219404e-06, 'epoch': 0.73} + 73%|███████▎ | 8925/12188 [19:16:33<6:49:37, 7.53s/it] 73%|███████▎ | 8926/12188 [19:16:41<7:03:19, 7.79s/it] {'loss': 0.2745, 'grad_norm': 0.7699039986152302, 'learning_rate': 1.7648498659603047e-06, 'epoch': 0.73} + 73%|███████▎ | 8926/12188 [19:16:41<7:03:19, 7.79s/it] 73%|███████▎ | 8927/12188 [19:16:48<6:48:32, 7.52s/it] {'loss': 0.3045, 'grad_norm': 0.6646087977043496, 'learning_rate': 1.763836888959774e-06, 'epoch': 0.73} + 73%|███████▎ | 8927/12188 [19:16:48<6:48:32, 7.52s/it] 73%|███████▎ | 8928/12188 [19:16:56<6:55:49, 7.65s/it] {'loss': 0.3101, 'grad_norm': 0.6772943909103356, 'learning_rate': 1.7628241404918845e-06, 'epoch': 0.73} + 73%|███████▎ | 8928/12188 [19:16:56<6:55:49, 7.65s/it] 73%|███████▎ | 8929/12188 [19:17:03<6:42:45, 7.42s/it] {'loss': 0.3794, 'grad_norm': 0.717110732598274, 'learning_rate': 1.7618116206281533e-06, 'epoch': 0.73} + 73%|███████▎ | 8929/12188 [19:17:03<6:42:45, 7.42s/it] 73%|███████▎ | 8930/12188 [19:17:10<6:37:28, 7.32s/it] {'loss': 0.2986, 'grad_norm': 0.8752566589886425, 'learning_rate': 1.760799329440081e-06, 'epoch': 0.73} + 73%|███████▎ | 8930/12188 [19:17:10<6:37:28, 7.32s/it] 73%|███████▎ | 8931/12188 [19:17:18<6:44:14, 7.45s/it] {'loss': 0.2935, 'grad_norm': 0.697619512163851, 'learning_rate': 1.7597872669991578e-06, 'epoch': 0.73} + 73%|███████▎ | 8931/12188 [19:17:18<6:44:14, 7.45s/it] 73%|███████▎ | 8932/12188 [19:17:26<6:57:49, 7.70s/it] {'loss': 0.3178, 'grad_norm': 0.723166215594604, 'learning_rate': 1.7587754333768504e-06, 'epoch': 0.73} + 73%|███████▎ | 8932/12188 [19:17:26<6:57:49, 7.70s/it] 73%|███████▎ | 8933/12188 [19:17:33<6:43:56, 7.45s/it] {'loss': 0.2758, 'grad_norm': 0.6806498271495274, 'learning_rate': 1.7577638286446158e-06, 'epoch': 0.73} + 73%|███████▎ | 8933/12188 [19:17:33<6:43:56, 7.45s/it] 73%|███████▎ | 8934/12188 [19:17:41<7:00:38, 7.76s/it] {'loss': 0.2929, 'grad_norm': 0.6375739852103074, 'learning_rate': 1.756752452873889e-06, 'epoch': 0.73} + 73%|███████▎ | 8934/12188 [19:17:41<7:00:38, 7.76s/it] 73%|███████▎ | 8935/12188 [19:17:48<6:48:03, 7.53s/it] {'loss': 0.3255, 'grad_norm': 0.6625624968328604, 'learning_rate': 1.755741306136095e-06, 'epoch': 0.73} + 73%|███████▎ | 8935/12188 [19:17:48<6:48:03, 7.53s/it] 73%|███████▎ | 8936/12188 [19:17:56<6:47:41, 7.52s/it] {'loss': 0.2697, 'grad_norm': 0.6516767951843688, 'learning_rate': 1.7547303885026361e-06, 'epoch': 0.73} + 73%|███████▎ | 8936/12188 [19:17:56<6:47:41, 7.52s/it] 73%|███████▎ | 8937/12188 [19:18:03<6:41:30, 7.41s/it] {'loss': 0.2974, 'grad_norm': 0.6874143873338475, 'learning_rate': 1.753719700044903e-06, 'epoch': 0.73} + 73%|███████▎ | 8937/12188 [19:18:03<6:41:30, 7.41s/it] 73%|███████▎ | 8938/12188 [19:18:12<7:03:08, 7.81s/it] {'loss': 0.2978, 'grad_norm': 0.6427651416263919, 'learning_rate': 1.7527092408342712e-06, 'epoch': 0.73} + 73%|███████▎ | 8938/12188 [19:18:12<7:03:08, 7.81s/it] 73%|███████▎ | 8939/12188 [19:18:19<6:51:53, 7.61s/it] {'loss': 0.3151, 'grad_norm': 0.7193072133496274, 'learning_rate': 1.7516990109420934e-06, 'epoch': 0.73} + 73%|███████▎ | 8939/12188 [19:18:19<6:51:53, 7.61s/it] 73%|███████▎ | 8940/12188 [19:18:26<6:41:15, 7.41s/it] {'loss': 0.3384, 'grad_norm': 0.7257478945672733, 'learning_rate': 1.7506890104397134e-06, 'epoch': 0.73} + 73%|███████▎ | 8940/12188 [19:18:26<6:41:15, 7.41s/it] 73%|███████▎ | 8941/12188 [19:18:33<6:39:16, 7.38s/it] {'loss': 0.3168, 'grad_norm': 0.8368089288440411, 'learning_rate': 1.7496792393984563e-06, 'epoch': 0.73} + 73%|███████▎ | 8941/12188 [19:18:33<6:39:16, 7.38s/it] 73%|███████▎ | 8942/12188 [19:18:41<6:50:12, 7.58s/it] {'loss': 0.2704, 'grad_norm': 0.692273552873225, 'learning_rate': 1.7486696978896279e-06, 'epoch': 0.73} + 73%|███████▎ | 8942/12188 [19:18:41<6:50:12, 7.58s/it] 73%|███████▎ | 8943/12188 [19:18:48<6:33:06, 7.27s/it] {'loss': 0.2698, 'grad_norm': 0.6691674953327194, 'learning_rate': 1.7476603859845227e-06, 'epoch': 0.73} + 73%|███████▎ | 8943/12188 [19:18:48<6:33:06, 7.27s/it] 73%|███████▎ | 8944/12188 [19:18:55<6:28:33, 7.19s/it] {'loss': 0.3032, 'grad_norm': 0.6435502995135963, 'learning_rate': 1.7466513037544165e-06, 'epoch': 0.73} + 73%|███████▎ | 8944/12188 [19:18:55<6:28:33, 7.19s/it] 73%|███████▎ | 8945/12188 [19:19:01<6:19:11, 7.02s/it] {'loss': 0.3487, 'grad_norm': 0.7893630471725237, 'learning_rate': 1.745642451270566e-06, 'epoch': 0.73} + 73%|███████▎ | 8945/12188 [19:19:01<6:19:11, 7.02s/it] 73%|███████▎ | 8946/12188 [19:19:10<6:43:25, 7.47s/it] {'loss': 0.276, 'grad_norm': 1.2463368076762091, 'learning_rate': 1.7446338286042196e-06, 'epoch': 0.73} + 73%|███████▎ | 8946/12188 [19:19:10<6:43:25, 7.47s/it] 73%|███████▎ | 8947/12188 [19:19:17<6:45:05, 7.50s/it] {'loss': 0.2777, 'grad_norm': 0.7804812275390927, 'learning_rate': 1.7436254358265997e-06, 'epoch': 0.73} + 73%|███████▎ | 8947/12188 [19:19:17<6:45:05, 7.50s/it] 73%|███████▎ | 8948/12188 [19:19:24<6:35:37, 7.33s/it] {'loss': 0.2869, 'grad_norm': 0.742250539996497, 'learning_rate': 1.742617273008922e-06, 'epoch': 0.73} + 73%|███████▎ | 8948/12188 [19:19:24<6:35:37, 7.33s/it] 73%|███████▎ | 8949/12188 [19:19:31<6:26:14, 7.15s/it] {'loss': 0.2692, 'grad_norm': 0.6867341013687283, 'learning_rate': 1.7416093402223773e-06, 'epoch': 0.73} + 73%|███████▎ | 8949/12188 [19:19:31<6:26:14, 7.15s/it] 73%|███████▎ | 8950/12188 [19:19:38<6:19:41, 7.04s/it] {'loss': 0.2837, 'grad_norm': 0.6848250055449742, 'learning_rate': 1.7406016375381452e-06, 'epoch': 0.73} + 73%|███████▎ | 8950/12188 [19:19:38<6:19:41, 7.04s/it] 73%|███████▎ | 8951/12188 [19:19:45<6:15:37, 6.96s/it] {'loss': 0.2958, 'grad_norm': 0.7034547194232116, 'learning_rate': 1.7395941650273913e-06, 'epoch': 0.73} + 73%|███████▎ | 8951/12188 [19:19:45<6:15:37, 6.96s/it] 73%|███████▎ | 8952/12188 [19:19:52<6:27:01, 7.18s/it] {'loss': 0.3139, 'grad_norm': 0.7147571910108416, 'learning_rate': 1.7385869227612573e-06, 'epoch': 0.73} + 73%|███████▎ | 8952/12188 [19:19:52<6:27:01, 7.18s/it] 73%|███████▎ | 8953/12188 [19:20:00<6:27:43, 7.19s/it] {'loss': 0.334, 'grad_norm': 0.7400548666269325, 'learning_rate': 1.7375799108108749e-06, 'epoch': 0.73} + 73%|███████▎ | 8953/12188 [19:20:00<6:27:43, 7.19s/it] 73%|███████▎ | 8954/12188 [19:20:07<6:23:11, 7.11s/it] {'loss': 0.3386, 'grad_norm': 0.7084713464067481, 'learning_rate': 1.7365731292473597e-06, 'epoch': 0.73} + 73%|███████▎ | 8954/12188 [19:20:07<6:23:11, 7.11s/it] 73%|███████▎ | 8955/12188 [19:20:14<6:28:59, 7.22s/it] {'loss': 0.2708, 'grad_norm': 0.6504436962409069, 'learning_rate': 1.7355665781418052e-06, 'epoch': 0.73} + 73%|███████▎ | 8955/12188 [19:20:14<6:28:59, 7.22s/it] 73%|███████▎ | 8956/12188 [19:20:21<6:18:11, 7.02s/it] {'loss': 0.3001, 'grad_norm': 0.7592680454740324, 'learning_rate': 1.7345602575652958e-06, 'epoch': 0.73} + 73%|███████▎ | 8956/12188 [19:20:21<6:18:11, 7.02s/it] 73%|███████▎ | 8957/12188 [19:20:28<6:30:44, 7.26s/it] {'loss': 0.3033, 'grad_norm': 0.7167414232461782, 'learning_rate': 1.7335541675888945e-06, 'epoch': 0.73} + 73%|███████▎ | 8957/12188 [19:20:28<6:30:44, 7.26s/it] 73%|███████▎ | 8958/12188 [19:20:36<6:36:14, 7.36s/it] {'loss': 0.2957, 'grad_norm': 0.6916089563364819, 'learning_rate': 1.7325483082836487e-06, 'epoch': 0.73} + 73%|███████▎ | 8958/12188 [19:20:36<6:36:14, 7.36s/it] 74%|███████▎ | 8959/12188 [19:20:43<6:29:25, 7.24s/it] {'loss': 0.2678, 'grad_norm': 0.7135597541464364, 'learning_rate': 1.7315426797205931e-06, 'epoch': 0.74} + 74%|███████▎ | 8959/12188 [19:20:43<6:29:25, 7.24s/it] 74%|███████▎ | 8960/12188 [19:20:50<6:22:17, 7.11s/it] {'loss': 0.2595, 'grad_norm': 0.6501630956375256, 'learning_rate': 1.730537281970741e-06, 'epoch': 0.74} + 74%|███████▎ | 8960/12188 [19:20:50<6:22:17, 7.11s/it] 74%|███████▎ | 8961/12188 [19:20:58<6:39:01, 7.42s/it] {'loss': 0.2914, 'grad_norm': 0.6338480386787156, 'learning_rate': 1.7295321151050948e-06, 'epoch': 0.74} + 74%|███████▎ | 8961/12188 [19:20:58<6:39:01, 7.42s/it] 74%|███████▎ | 8962/12188 [19:21:05<6:38:08, 7.41s/it] {'loss': 0.3293, 'grad_norm': 0.7994384223978753, 'learning_rate': 1.7285271791946345e-06, 'epoch': 0.74} + 74%|███████▎ | 8962/12188 [19:21:05<6:38:08, 7.41s/it] 74%|███████▎ | 8963/12188 [19:21:12<6:26:50, 7.20s/it] {'loss': 0.3253, 'grad_norm': 0.6830473290270371, 'learning_rate': 1.7275224743103287e-06, 'epoch': 0.74} + 74%|███████▎ | 8963/12188 [19:21:12<6:26:50, 7.20s/it] 74%|███████▎ | 8964/12188 [19:21:19<6:16:28, 7.01s/it] {'loss': 0.3051, 'grad_norm': 0.7557528314024115, 'learning_rate': 1.7265180005231296e-06, 'epoch': 0.74} + 74%|███████▎ | 8964/12188 [19:21:19<6:16:28, 7.01s/it] 74%|███████▎ | 8965/12188 [19:21:26<6:24:25, 7.16s/it] {'loss': 0.3169, 'grad_norm': 0.6710443543356633, 'learning_rate': 1.7255137579039689e-06, 'epoch': 0.74} + 74%|███████▎ | 8965/12188 [19:21:26<6:24:25, 7.16s/it] 74%|███████▎ | 8966/12188 [19:21:33<6:29:29, 7.25s/it] {'loss': 0.2839, 'grad_norm': 0.6888025772896211, 'learning_rate': 1.724509746523767e-06, 'epoch': 0.74} + 74%|███████▎ | 8966/12188 [19:21:34<6:29:29, 7.25s/it] 74%|███████▎ | 8967/12188 [19:21:40<6:22:22, 7.12s/it] {'loss': 0.2923, 'grad_norm': 0.7697590119075425, 'learning_rate': 1.7235059664534226e-06, 'epoch': 0.74} + 74%|███████▎ | 8967/12188 [19:21:40<6:22:22, 7.12s/it] 74%|███████▎ | 8968/12188 [19:21:47<6:22:29, 7.13s/it] {'loss': 0.3128, 'grad_norm': 0.8336188151826045, 'learning_rate': 1.7225024177638234e-06, 'epoch': 0.74} + 74%|███████▎ | 8968/12188 [19:21:47<6:22:29, 7.13s/it] 74%|███████▎ | 8969/12188 [19:21:54<6:16:25, 7.02s/it] {'loss': 0.2845, 'grad_norm': 0.6347785848017541, 'learning_rate': 1.7214991005258386e-06, 'epoch': 0.74} + 74%|███████▎ | 8969/12188 [19:21:54<6:16:25, 7.02s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 74%|███████▎ | 8970/12188 [19:22:00<5:53:40, 6.59s/it] {'loss': 0.6286, 'grad_norm': 0.5588556452840895, 'learning_rate': 1.720496014810319e-06, 'epoch': 0.74} + 74%|███████▎ | 8970/12188 [19:22:00<5:53:40, 6.59s/it] 74%|███████▎ | 8971/12188 [19:22:11<7:01:39, 7.86s/it] {'loss': 0.2659, 'grad_norm': 0.7183268785900047, 'learning_rate': 1.7194931606881033e-06, 'epoch': 0.74} + 74%|███████▎ | 8971/12188 [19:22:11<7:01:39, 7.86s/it] 74%|███████▎ | 8972/12188 [19:22:17<6:42:10, 7.50s/it] {'loss': 0.2702, 'grad_norm': 0.6543951534712729, 'learning_rate': 1.7184905382300098e-06, 'epoch': 0.74} + 74%|███████▎ | 8972/12188 [19:22:17<6:42:10, 7.50s/it] 74%|███████▎ | 8973/12188 [19:22:25<6:43:08, 7.52s/it] {'loss': 0.2899, 'grad_norm': 0.7225607439981256, 'learning_rate': 1.7174881475068412e-06, 'epoch': 0.74} + 74%|███████▎ | 8973/12188 [19:22:25<6:43:08, 7.52s/it] 74%|███████▎ | 8974/12188 [19:22:32<6:33:58, 7.35s/it] {'loss': 0.3077, 'grad_norm': 0.794948771972903, 'learning_rate': 1.7164859885893875e-06, 'epoch': 0.74} + 74%|███████▎ | 8974/12188 [19:22:32<6:33:58, 7.35s/it] 74%|███████▎ | 8975/12188 [19:22:39<6:30:15, 7.29s/it] {'loss': 0.3069, 'grad_norm': 0.6813247503058396, 'learning_rate': 1.715484061548416e-06, 'epoch': 0.74} + 74%|███████▎ | 8975/12188 [19:22:39<6:30:15, 7.29s/it] 74%|███████▎ | 8976/12188 [19:22:46<6:22:13, 7.14s/it] {'loss': 0.3193, 'grad_norm': 0.75262455094905, 'learning_rate': 1.7144823664546829e-06, 'epoch': 0.74} + 74%|███████▎ | 8976/12188 [19:22:46<6:22:13, 7.14s/it] 74%|███████▎ | 8977/12188 [19:22:53<6:17:05, 7.05s/it] {'loss': 0.3285, 'grad_norm': 0.7053356726146659, 'learning_rate': 1.7134809033789285e-06, 'epoch': 0.74} + 74%|███████▎ | 8977/12188 [19:22:53<6:17:05, 7.05s/it] 74%|███████▎ | 8978/12188 [19:23:01<6:33:38, 7.36s/it] {'loss': 0.2979, 'grad_norm': 0.6587535464649084, 'learning_rate': 1.7124796723918708e-06, 'epoch': 0.74} + 74%|███████▎ | 8978/12188 [19:23:01<6:33:38, 7.36s/it] 74%|███████▎ | 8979/12188 [19:23:08<6:26:22, 7.22s/it] {'loss': 0.2977, 'grad_norm': 0.9174572515908077, 'learning_rate': 1.711478673564218e-06, 'epoch': 0.74} + 74%|███████▎ | 8979/12188 [19:23:08<6:26:22, 7.22s/it] 74%|███████▎ | 8980/12188 [19:23:16<6:52:25, 7.71s/it] {'loss': 0.2702, 'grad_norm': 0.6728239703671753, 'learning_rate': 1.7104779069666565e-06, 'epoch': 0.74} + 74%|███████▎ | 8980/12188 [19:23:16<6:52:25, 7.71s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1348, in _get_item + raise ValueError( +ValueError: Number of image tokens ['data/ant-design/upload/other_screenshot/original/StyledFileUploader_1742052280.344486.png'] does not match number of images None +[Try #0] Failed to fetch sample 1843668 in VC:s3://gui-agent/jedi/images/final_1.5m/final_1.5m_extracted/. Exception: Number of image tokens ['data/ant-design/upload/other_screenshot/original/StyledFileUploader_1742052280.344486.png'] does not match number of images None +Problematic sample: {'image': 'data/ant-design/upload/other_screenshot/original/StyledFileUploader_1742052280.344486.png', 'conversations': []} + 74%|███████▎ | 8981/12188 [19:23:23<6:40:33, 7.49s/it] {'loss': 0.2901, 'grad_norm': 0.7129829182312416, 'learning_rate': 1.7094773726698605e-06, 'epoch': 0.74} + 74%|███████▎ | 8981/12188 [19:23:23<6:40:33, 7.49s/it] 74%|███████▎ | 8982/12188 [19:23:30<6:33:13, 7.36s/it] {'loss': 0.3488, 'grad_norm': 0.8046584577556191, 'learning_rate': 1.7084770707444876e-06, 'epoch': 0.74} + 74%|███████▎ | 8982/12188 [19:23:30<6:33:13, 7.36s/it] 74%|███████▎ | 8983/12188 [19:23:37<6:24:06, 7.19s/it] {'loss': 0.2948, 'grad_norm': 0.7243545647415289, 'learning_rate': 1.7074770012611736e-06, 'epoch': 0.74} + 74%|███████▎ | 8983/12188 [19:23:37<6:24:06, 7.19s/it] 74%|███████▎ | 8984/12188 [19:23:44<6:16:45, 7.06s/it] {'loss': 0.3312, 'grad_norm': 0.780839927326685, 'learning_rate': 1.706477164290546e-06, 'epoch': 0.74} + 74%|███████▎ | 8984/12188 [19:23:44<6:16:45, 7.06s/it] 74%|███████▎ | 8985/12188 [19:23:51<6:11:55, 6.97s/it] {'loss': 0.3302, 'grad_norm': 0.7429924567306323, 'learning_rate': 1.7054775599032093e-06, 'epoch': 0.74} + 74%|███████▎ | 8985/12188 [19:23:51<6:11:55, 6.97s/it] 74%|███████▎ | 8986/12188 [19:23:58<6:13:24, 7.00s/it] {'loss': 0.3101, 'grad_norm': 0.7682139587949723, 'learning_rate': 1.7044781881697526e-06, 'epoch': 0.74} + 74%|███████▎ | 8986/12188 [19:23:58<6:13:24, 7.00s/it] 74%|███████▎ | 8987/12188 [19:24:07<6:49:43, 7.68s/it] {'loss': 0.2977, 'grad_norm': 0.7167572322321712, 'learning_rate': 1.7034790491607533e-06, 'epoch': 0.74} + 74%|███████▎ | 8987/12188 [19:24:07<6:49:43, 7.68s/it] 74%|███████▎ | 8988/12188 [19:24:15<6:45:27, 7.60s/it] {'loss': 0.2996, 'grad_norm': 0.6640243546463714, 'learning_rate': 1.7024801429467653e-06, 'epoch': 0.74} + 74%|███████▎ | 8988/12188 [19:24:15<6:45:27, 7.60s/it] 74%|███████▍ | 8989/12188 [19:24:22<6:41:49, 7.54s/it] {'loss': 0.3297, 'grad_norm': 0.6545899786588583, 'learning_rate': 1.7014814695983324e-06, 'epoch': 0.74} + 74%|███████▍ | 8989/12188 [19:24:22<6:41:49, 7.54s/it] 74%|███████▍ | 8990/12188 [19:24:29<6:42:08, 7.55s/it] {'loss': 0.2844, 'grad_norm': 0.6700403408978004, 'learning_rate': 1.7004830291859797e-06, 'epoch': 0.74} + 74%|███████▍ | 8990/12188 [19:24:29<6:42:08, 7.55s/it] 74%|███████▍ | 8991/12188 [19:24:38<6:56:42, 7.82s/it] {'loss': 0.3215, 'grad_norm': 0.661776999602463, 'learning_rate': 1.699484821780213e-06, 'epoch': 0.74} + 74%|███████▍ | 8991/12188 [19:24:38<6:56:42, 7.82s/it] 74%|███████▍ | 8992/12188 [19:24:44<6:35:36, 7.43s/it] {'loss': 0.276, 'grad_norm': 0.6081439294094818, 'learning_rate': 1.698486847451527e-06, 'epoch': 0.74} + 74%|███████▍ | 8992/12188 [19:24:44<6:35:36, 7.43s/it] 74%|███████▍ | 8993/12188 [19:24:51<6:25:29, 7.24s/it] {'loss': 0.2995, 'grad_norm': 0.7495937262079024, 'learning_rate': 1.697489106270394e-06, 'epoch': 0.74} + 74%|███████▍ | 8993/12188 [19:24:51<6:25:29, 7.24s/it] 74%|███████▍ | 8994/12188 [19:24:59<6:31:56, 7.36s/it] {'loss': 0.3206, 'grad_norm': 0.6192607642735171, 'learning_rate': 1.696491598307275e-06, 'epoch': 0.74} + 74%|███████▍ | 8994/12188 [19:24:59<6:31:56, 7.36s/it] 74%|███████▍ | 8995/12188 [19:25:07<6:38:12, 7.48s/it] {'loss': 0.2961, 'grad_norm': 0.7284624271437059, 'learning_rate': 1.6954943236326127e-06, 'epoch': 0.74} + 74%|███████▍ | 8995/12188 [19:25:07<6:38:12, 7.48s/it] 74%|███████▍ | 8996/12188 [19:25:14<6:31:26, 7.36s/it] {'loss': 0.3212, 'grad_norm': 0.6603833228740623, 'learning_rate': 1.694497282316831e-06, 'epoch': 0.74} + 74%|███████▍ | 8996/12188 [19:25:14<6:31:26, 7.36s/it] 74%|███████▍ | 8997/12188 [19:25:21<6:29:09, 7.32s/it] {'loss': 0.2835, 'grad_norm': 0.6337974809722191, 'learning_rate': 1.6935004744303419e-06, 'epoch': 0.74} + 74%|███████▍ | 8997/12188 [19:25:21<6:29:09, 7.32s/it] 74%|███████▍ | 8998/12188 [19:25:28<6:20:20, 7.15s/it] {'loss': 0.316, 'grad_norm': 0.6747330689742984, 'learning_rate': 1.6925039000435356e-06, 'epoch': 0.74} + 74%|███████▍ | 8998/12188 [19:25:28<6:20:20, 7.15s/it] 74%|███████▍ | 8999/12188 [19:25:36<6:43:18, 7.59s/it] {'loss': 0.2917, 'grad_norm': 0.6682804784636275, 'learning_rate': 1.691507559226791e-06, 'epoch': 0.74} + 74%|███████▍ | 8999/12188 [19:25:36<6:43:18, 7.59s/it] 74%|███████▍ | 9000/12188 [19:25:43<6:27:36, 7.30s/it] {'loss': 0.3171, 'grad_norm': 0.6691646917769626, 'learning_rate': 1.690511452050468e-06, 'epoch': 0.74} + 74%|███████▍ | 9000/12188 [19:25:43<6:27:36, 7.30s/it]/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/utils/checkpoint.py:87: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( + 74%|███████▍ | 9001/12188 [19:26:07<10:56:49, 12.37s/it] {'loss': 0.2842, 'grad_norm': 0.669142385643131, 'learning_rate': 1.6895155785849066e-06, 'epoch': 0.74} + 74%|███████▍ | 9001/12188 [19:26:07<10:56:49, 12.37s/it] 74%|███████▍ | 9002/12188 [19:26:15<9:42:41, 10.97s/it] {'loss': 0.2868, 'grad_norm': 0.7554302259797478, 'learning_rate': 1.6885199389004387e-06, 'epoch': 0.74} + 74%|███████▍ | 9002/12188 [19:26:15<9:42:41, 10.97s/it] 74%|███████▍ | 9003/12188 [19:26:22<8:37:25, 9.75s/it] {'loss': 0.3021, 'grad_norm': 0.6748216072871598, 'learning_rate': 1.6875245330673701e-06, 'epoch': 0.74} + 74%|███████▍ | 9003/12188 [19:26:22<8:37:25, 9.75s/it] 74%|███████▍ | 9004/12188 [19:26:29<8:02:35, 9.09s/it] {'loss': 0.2788, 'grad_norm': 0.6696073569844541, 'learning_rate': 1.686529361155998e-06, 'epoch': 0.74} + 74%|███████▍ | 9004/12188 [19:26:29<8:02:35, 9.09s/it] 74%|███████▍ | 9005/12188 [19:26:38<7:55:07, 8.96s/it] {'loss': 0.2808, 'grad_norm': 0.7113559392685461, 'learning_rate': 1.6855344232366e-06, 'epoch': 0.74} + 74%|███████▍ | 9005/12188 [19:26:38<7:55:07, 8.96s/it] 74%|███████▍ | 9006/12188 [19:26:45<7:18:23, 8.27s/it] {'loss': 0.3022, 'grad_norm': 0.6852535051009078, 'learning_rate': 1.6845397193794338e-06, 'epoch': 0.74} + 74%|███████▍ | 9006/12188 [19:26:45<7:18:23, 8.27s/it] 74%|███████▍ | 9007/12188 [19:26:52<7:03:37, 7.99s/it] {'loss': 0.3066, 'grad_norm': 0.6810352344904457, 'learning_rate': 1.6835452496547467e-06, 'epoch': 0.74} + 74%|███████▍ | 9007/12188 [19:26:52<7:03:37, 7.99s/it] 74%|███████▍ | 9008/12188 [19:26:59<6:41:39, 7.58s/it] {'loss': 0.2919, 'grad_norm': 0.7656051204759271, 'learning_rate': 1.6825510141327673e-06, 'epoch': 0.74} + 74%|███████▍ | 9008/12188 [19:26:59<6:41:39, 7.58s/it] 74%|███████▍ | 9009/12188 [19:27:06<6:38:36, 7.52s/it] {'loss': 0.2909, 'grad_norm': 0.7781254526678371, 'learning_rate': 1.6815570128837034e-06, 'epoch': 0.74} + 74%|███████▍ | 9009/12188 [19:27:06<6:38:36, 7.52s/it] 74%|███████▍ | 9010/12188 [19:27:14<6:43:49, 7.62s/it] {'loss': 0.336, 'grad_norm': 0.6887840831685725, 'learning_rate': 1.6805632459777539e-06, 'epoch': 0.74} + 74%|███████▍ | 9010/12188 [19:27:14<6:43:49, 7.62s/it] 74%|███████▍ | 9011/12188 [19:27:21<6:34:34, 7.45s/it] {'loss': 0.299, 'grad_norm': 0.6595737403400069, 'learning_rate': 1.6795697134850925e-06, 'epoch': 0.74} + 74%|███████▍ | 9011/12188 [19:27:21<6:34:34, 7.45s/it] 74%|███████▍ | 9012/12188 [19:27:28<6:24:04, 7.26s/it] {'loss': 0.2993, 'grad_norm': 0.6959654228437983, 'learning_rate': 1.6785764154758856e-06, 'epoch': 0.74} + 74%|███████▍ | 9012/12188 [19:27:28<6:24:04, 7.26s/it] 74%|███████▍ | 9013/12188 [19:27:36<6:35:06, 7.47s/it] {'loss': 0.2698, 'grad_norm': 0.6539133073708367, 'learning_rate': 1.6775833520202756e-06, 'epoch': 0.74} + 74%|███████▍ | 9013/12188 [19:27:36<6:35:06, 7.47s/it] 74%|███████▍ | 9014/12188 [19:27:43<6:25:32, 7.29s/it] {'loss': 0.3117, 'grad_norm': 0.7125963070720504, 'learning_rate': 1.67659052318839e-06, 'epoch': 0.74} + 74%|███████▍ | 9014/12188 [19:27:43<6:25:32, 7.29s/it] 74%|███████▍ | 9015/12188 [19:27:49<6:16:39, 7.12s/it] {'loss': 0.2954, 'grad_norm': 0.6567861862275963, 'learning_rate': 1.6755979290503437e-06, 'epoch': 0.74} + 74%|███████▍ | 9015/12188 [19:27:49<6:16:39, 7.12s/it] 74%|███████▍ | 9016/12188 [19:27:56<6:10:48, 7.01s/it] {'loss': 0.3265, 'grad_norm': 0.7747312126925355, 'learning_rate': 1.6746055696762292e-06, 'epoch': 0.74} + 74%|███████▍ | 9016/12188 [19:27:56<6:10:48, 7.01s/it] 74%|███████▍ | 9017/12188 [19:28:03<6:03:31, 6.88s/it] {'loss': 0.3161, 'grad_norm': 0.6508837414668139, 'learning_rate': 1.6736134451361264e-06, 'epoch': 0.74} + 74%|███████▍ | 9017/12188 [19:28:03<6:03:31, 6.88s/it] 74%|███████▍ | 9018/12188 [19:28:10<6:08:15, 6.97s/it] {'loss': 0.3238, 'grad_norm': 0.6849043517614752, 'learning_rate': 1.6726215555000997e-06, 'epoch': 0.74} + 74%|███████▍ | 9018/12188 [19:28:10<6:08:15, 6.97s/it] 74%|███████▍ | 9019/12188 [19:28:17<6:07:30, 6.96s/it] {'loss': 0.2829, 'grad_norm': 0.6676529722789417, 'learning_rate': 1.6716299008381908e-06, 'epoch': 0.74} + 74%|███████▍ | 9019/12188 [19:28:17<6:07:30, 6.96s/it] 74%|███████▍ | 9020/12188 [19:28:25<6:23:21, 7.26s/it] {'loss': 0.2974, 'grad_norm': 0.6948137184215674, 'learning_rate': 1.670638481220433e-06, 'epoch': 0.74} + 74%|███████▍ | 9020/12188 [19:28:25<6:23:21, 7.26s/it] 74%|███████▍ | 9021/12188 [19:28:32<6:30:27, 7.40s/it] {'loss': 0.3229, 'grad_norm': 0.672271929099283, 'learning_rate': 1.6696472967168348e-06, 'epoch': 0.74} + 74%|███████▍ | 9021/12188 [19:28:32<6:30:27, 7.40s/it] 74%|███████▍ | 9022/12188 [19:28:39<6:16:40, 7.14s/it] {'loss': 0.32, 'grad_norm': 0.7291428635669842, 'learning_rate': 1.6686563473973937e-06, 'epoch': 0.74} + 74%|███████▍ | 9022/12188 [19:28:39<6:16:40, 7.14s/it] 74%|███████▍ | 9023/12188 [19:28:46<6:13:16, 7.08s/it] {'loss': 0.313, 'grad_norm': 0.7161935190345947, 'learning_rate': 1.6676656333320912e-06, 'epoch': 0.74} + 74%|███████▍ | 9023/12188 [19:28:46<6:13:16, 7.08s/it] 74%|███████▍ | 9024/12188 [19:28:54<6:29:13, 7.38s/it] {'loss': 0.3152, 'grad_norm': 0.6756512198325326, 'learning_rate': 1.666675154590886e-06, 'epoch': 0.74} + 74%|███████▍ | 9024/12188 [19:28:54<6:29:13, 7.38s/it] 74%|███████▍ | 9025/12188 [19:29:02<6:47:08, 7.72s/it] {'loss': 0.3384, 'grad_norm': 0.6653992130100417, 'learning_rate': 1.6656849112437278e-06, 'epoch': 0.74} + 74%|███████▍ | 9025/12188 [19:29:02<6:47:08, 7.72s/it] 74%|███████▍ | 9026/12188 [19:29:09<6:31:59, 7.44s/it] {'loss': 0.3228, 'grad_norm': 0.7050157583344595, 'learning_rate': 1.6646949033605425e-06, 'epoch': 0.74} + 74%|███████▍ | 9026/12188 [19:29:09<6:31:59, 7.44s/it] 74%|███████▍ | 9027/12188 [19:29:16<6:19:25, 7.20s/it] {'loss': 0.299, 'grad_norm': 0.7089085248281227, 'learning_rate': 1.6637051310112462e-06, 'epoch': 0.74} + 74%|███████▍ | 9027/12188 [19:29:16<6:19:25, 7.20s/it] 74%|███████▍ | 9028/12188 [19:29:23<6:15:26, 7.13s/it] {'loss': 0.3063, 'grad_norm': 0.7224555571107467, 'learning_rate': 1.662715594265733e-06, 'epoch': 0.74} + 74%|███████▍ | 9028/12188 [19:29:23<6:15:26, 7.13s/it] 74%|███████▍ | 9029/12188 [19:29:30<6:23:10, 7.28s/it] {'loss': 0.2719, 'grad_norm': 0.643130073472629, 'learning_rate': 1.6617262931938815e-06, 'epoch': 0.74} + 74%|███████▍ | 9029/12188 [19:29:30<6:23:10, 7.28s/it] 74%|███████▍ | 9030/12188 [19:29:38<6:20:23, 7.23s/it] {'loss': 0.285, 'grad_norm': 0.6784819218440444, 'learning_rate': 1.660737227865556e-06, 'epoch': 0.74} + 74%|███████▍ | 9030/12188 [19:29:38<6:20:23, 7.23s/it] 74%|███████▍ | 9031/12188 [19:29:44<6:11:05, 7.05s/it] {'loss': 0.3309, 'grad_norm': 0.7061714823905714, 'learning_rate': 1.6597483983506034e-06, 'epoch': 0.74} + 74%|███████▍ | 9031/12188 [19:29:44<6:11:05, 7.05s/it] 74%|███████▍ | 9032/12188 [19:29:53<6:37:45, 7.56s/it] {'loss': 0.3167, 'grad_norm': 0.7025956431357532, 'learning_rate': 1.6587598047188514e-06, 'epoch': 0.74} + 74%|███████▍ | 9032/12188 [19:29:53<6:37:45, 7.56s/it] 74%|███████▍ | 9033/12188 [19:30:00<6:28:10, 7.38s/it] {'loss': 0.2829, 'grad_norm': 0.6700806704633374, 'learning_rate': 1.6577714470401158e-06, 'epoch': 0.74} + 74%|███████▍ | 9033/12188 [19:30:00<6:28:10, 7.38s/it] 74%|███████▍ | 9034/12188 [19:30:08<6:33:07, 7.48s/it] {'loss': 0.3129, 'grad_norm': 0.6570568393094431, 'learning_rate': 1.6567833253841887e-06, 'epoch': 0.74} + 74%|███████▍ | 9034/12188 [19:30:08<6:33:07, 7.48s/it] 74%|███████▍ | 9035/12188 [19:30:15<6:26:52, 7.36s/it] {'loss': 0.2962, 'grad_norm': 0.6427978024680875, 'learning_rate': 1.655795439820852e-06, 'epoch': 0.74} + 74%|███████▍ | 9035/12188 [19:30:15<6:26:52, 7.36s/it] 74%|███████▍ | 9036/12188 [19:30:22<6:30:05, 7.43s/it] {'loss': 0.2743, 'grad_norm': 0.7250316001636591, 'learning_rate': 1.6548077904198701e-06, 'epoch': 0.74} + 74%|███████▍ | 9036/12188 [19:30:22<6:30:05, 7.43s/it] 74%|███████▍ | 9037/12188 [19:30:29<6:22:17, 7.28s/it] {'loss': 0.2799, 'grad_norm': 0.6381439574914632, 'learning_rate': 1.6538203772509859e-06, 'epoch': 0.74} + 74%|███████▍ | 9037/12188 [19:30:29<6:22:17, 7.28s/it] 74%|███████▍ | 9038/12188 [19:30:36<6:21:56, 7.28s/it] {'loss': 0.3099, 'grad_norm': 0.7045658746306751, 'learning_rate': 1.6528332003839325e-06, 'epoch': 0.74} + 74%|███████▍ | 9038/12188 [19:30:36<6:21:56, 7.28s/it] 74%|███████▍ | 9039/12188 [19:30:43<6:13:25, 7.12s/it] {'loss': 0.3358, 'grad_norm': 0.7334610133559927, 'learning_rate': 1.6518462598884188e-06, 'epoch': 0.74} + 74%|███████▍ | 9039/12188 [19:30:43<6:13:25, 7.12s/it] 74%|███████▍ | 9040/12188 [19:30:51<6:18:59, 7.22s/it] {'loss': 0.2804, 'grad_norm': 0.6757987733512806, 'learning_rate': 1.650859555834145e-06, 'epoch': 0.74} + 74%|███████▍ | 9040/12188 [19:30:51<6:18:59, 7.22s/it] 74%|███████▍ | 9041/12188 [19:30:58<6:17:44, 7.20s/it] {'loss': 0.3007, 'grad_norm': 0.6444314322776145, 'learning_rate': 1.6498730882907882e-06, 'epoch': 0.74} + 74%|███████▍ | 9041/12188 [19:30:58<6:17:44, 7.20s/it] 74%|███████▍ | 9042/12188 [19:31:05<6:21:42, 7.28s/it] {'loss': 0.3084, 'grad_norm': 0.6408017705043506, 'learning_rate': 1.6488868573280104e-06, 'epoch': 0.74} + 74%|███████▍ | 9042/12188 [19:31:05<6:21:42, 7.28s/it] 74%|███████▍ | 9043/12188 [19:31:13<6:21:25, 7.28s/it] {'loss': 0.305, 'grad_norm': 0.6873893748127926, 'learning_rate': 1.6479008630154585e-06, 'epoch': 0.74} + 74%|███████▍ | 9043/12188 [19:31:13<6:21:25, 7.28s/it] 74%|███████▍ | 9044/12188 [19:31:20<6:15:38, 7.17s/it] {'loss': 0.309, 'grad_norm': 0.6839377081987794, 'learning_rate': 1.6469151054227638e-06, 'epoch': 0.74} + 74%|███████▍ | 9044/12188 [19:31:20<6:15:38, 7.17s/it] 74%|███████▍ | 9045/12188 [19:31:26<6:10:14, 7.07s/it] {'loss': 0.3058, 'grad_norm': 0.6907260301923959, 'learning_rate': 1.6459295846195355e-06, 'epoch': 0.74} + 74%|███████▍ | 9045/12188 [19:31:26<6:10:14, 7.07s/it] 74%|███████▍ | 9046/12188 [19:31:35<6:41:13, 7.66s/it] {'loss': 0.3124, 'grad_norm': 0.7109450753627912, 'learning_rate': 1.644944300675373e-06, 'epoch': 0.74} + 74%|███████▍ | 9046/12188 [19:31:35<6:41:13, 7.66s/it] 74%|███████▍ | 9047/12188 [19:31:42<6:31:39, 7.48s/it] {'loss': 0.2877, 'grad_norm': 0.8873070016231834, 'learning_rate': 1.6439592536598515e-06, 'epoch': 0.74} + 74%|███████▍ | 9047/12188 [19:31:42<6:31:39, 7.48s/it] 74%|███████▍ | 9048/12188 [19:31:50<6:35:40, 7.56s/it] {'loss': 0.2643, 'grad_norm': 0.7563417544915398, 'learning_rate': 1.642974443642536e-06, 'epoch': 0.74} + 74%|███████▍ | 9048/12188 [19:31:50<6:35:40, 7.56s/it] 74%|███████▍ | 9049/12188 [19:31:57<6:24:56, 7.36s/it] {'loss': 0.3394, 'grad_norm': 0.769967575918045, 'learning_rate': 1.6419898706929731e-06, 'epoch': 0.74} + 74%|███████▍ | 9049/12188 [19:31:57<6:24:56, 7.36s/it] 74%|███████▍ | 9050/12188 [19:32:04<6:18:59, 7.25s/it] {'loss': 0.3303, 'grad_norm': 0.7782230040504208, 'learning_rate': 1.6410055348806898e-06, 'epoch': 0.74} + 74%|███████▍ | 9050/12188 [19:32:04<6:18:59, 7.25s/it] 74%|███████▍ | 9051/12188 [19:32:11<6:13:16, 7.14s/it] {'loss': 0.2923, 'grad_norm': 0.7106882180974036, 'learning_rate': 1.6400214362751997e-06, 'epoch': 0.74} + 74%|███████▍ | 9051/12188 [19:32:11<6:13:16, 7.14s/it] 74%|███████▍ | 9052/12188 [19:32:18<6:07:16, 7.03s/it] {'loss': 0.2873, 'grad_norm': 0.8164908788626132, 'learning_rate': 1.6390375749459963e-06, 'epoch': 0.74} + 74%|███████▍ | 9052/12188 [19:32:18<6:07:16, 7.03s/it] 74%|███████▍ | 9053/12188 [19:32:25<6:14:33, 7.17s/it] {'loss': 0.3028, 'grad_norm': 0.677029858260695, 'learning_rate': 1.638053950962561e-06, 'epoch': 0.74} + 74%|███████▍ | 9053/12188 [19:32:25<6:14:33, 7.17s/it] 74%|███████▍ | 9054/12188 [19:32:32<6:11:14, 7.11s/it] {'loss': 0.3011, 'grad_norm': 0.6931006718509175, 'learning_rate': 1.637070564394353e-06, 'epoch': 0.74} + 74%|███████▍ | 9054/12188 [19:32:32<6:11:14, 7.11s/it] 74%|███████▍ | 9055/12188 [19:32:39<6:12:08, 7.13s/it] {'loss': 0.2591, 'grad_norm': 0.6410349229389035, 'learning_rate': 1.6360874153108203e-06, 'epoch': 0.74} + 74%|███████▍ | 9055/12188 [19:32:39<6:12:08, 7.13s/it] 74%|███████▍ | 9056/12188 [19:32:48<6:32:21, 7.52s/it] {'loss': 0.2898, 'grad_norm': 0.7006120024676324, 'learning_rate': 1.6351045037813895e-06, 'epoch': 0.74} + 74%|███████▍ | 9056/12188 [19:32:48<6:32:21, 7.52s/it] 74%|███████▍ | 9057/12188 [19:32:56<6:43:31, 7.73s/it] {'loss': 0.3135, 'grad_norm': 0.8809873411804241, 'learning_rate': 1.6341218298754713e-06, 'epoch': 0.74} + 74%|███████▍ | 9057/12188 [19:32:56<6:43:31, 7.73s/it] 74%|███████▍ | 9058/12188 [19:33:04<6:43:54, 7.74s/it] {'loss': 0.296, 'grad_norm': 0.6587730460852336, 'learning_rate': 1.6331393936624612e-06, 'epoch': 0.74} + 74%|███████▍ | 9058/12188 [19:33:04<6:43:54, 7.74s/it] 74%|███████▍ | 9059/12188 [19:33:12<6:50:11, 7.87s/it] {'loss': 0.3096, 'grad_norm': 0.7580695283487353, 'learning_rate': 1.6321571952117399e-06, 'epoch': 0.74} + 74%|███████▍ | 9059/12188 [19:33:12<6:50:11, 7.87s/it] 74%|███████▍ | 9060/12188 [19:33:19<6:45:16, 7.77s/it] {'loss': 0.2793, 'grad_norm': 0.625850373632755, 'learning_rate': 1.631175234592665e-06, 'epoch': 0.74} + 74%|███████▍ | 9060/12188 [19:33:20<6:45:16, 7.77s/it] 74%|███████▍ | 9061/12188 [19:33:27<6:38:03, 7.64s/it] {'loss': 0.3116, 'grad_norm': 0.6433131772018245, 'learning_rate': 1.6301935118745826e-06, 'epoch': 0.74} + 74%|███████▍ | 9061/12188 [19:33:27<6:38:03, 7.64s/it] 74%|███████▍ | 9062/12188 [19:33:34<6:27:53, 7.45s/it] {'loss': 0.3238, 'grad_norm': 0.6328049199908392, 'learning_rate': 1.629212027126822e-06, 'epoch': 0.74} + 74%|███████▍ | 9062/12188 [19:33:34<6:27:53, 7.45s/it] 74%|███████▍ | 9063/12188 [19:33:43<6:48:01, 7.83s/it] {'loss': 0.3225, 'grad_norm': 0.6579109952651608, 'learning_rate': 1.628230780418691e-06, 'epoch': 0.74} + 74%|███████▍ | 9063/12188 [19:33:43<6:48:01, 7.83s/it] 74%|███████▍ | 9064/12188 [19:33:50<6:40:11, 7.69s/it] {'loss': 0.2825, 'grad_norm': 0.760977784763091, 'learning_rate': 1.6272497718194863e-06, 'epoch': 0.74} + 74%|███████▍ | 9064/12188 [19:33:50<6:40:11, 7.69s/it] 74%|███████▍ | 9065/12188 [19:33:57<6:37:27, 7.64s/it] {'loss': 0.3084, 'grad_norm': 0.6390701665519067, 'learning_rate': 1.6262690013984827e-06, 'epoch': 0.74} + 74%|███████▍ | 9065/12188 [19:33:57<6:37:27, 7.64s/it] 74%|███████▍ | 9066/12188 [19:34:04<6:22:02, 7.34s/it] {'loss': 0.3215, 'grad_norm': 0.6956008181474991, 'learning_rate': 1.6252884692249416e-06, 'epoch': 0.74} + 74%|███████▍ | 9066/12188 [19:34:04<6:22:02, 7.34s/it] 74%|███████▍ | 9067/12188 [19:34:12<6:26:10, 7.42s/it] {'loss': 0.2684, 'grad_norm': 0.6473628412443632, 'learning_rate': 1.6243081753681088e-06, 'epoch': 0.74} + 74%|███████▍ | 9067/12188 [19:34:12<6:26:10, 7.42s/it] 74%|███████▍ | 9068/12188 [19:34:19<6:21:15, 7.33s/it] {'loss': 0.3276, 'grad_norm': 0.8658996846136411, 'learning_rate': 1.6233281198972095e-06, 'epoch': 0.74} + 74%|███████▍ | 9068/12188 [19:34:19<6:21:15, 7.33s/it] 74%|███████▍ | 9069/12188 [19:34:26<6:20:52, 7.33s/it] {'loss': 0.3135, 'grad_norm': 0.6890209852544428, 'learning_rate': 1.622348302881453e-06, 'epoch': 0.74} + 74%|███████▍ | 9069/12188 [19:34:26<6:20:52, 7.33s/it] 74%|███████▍ | 9070/12188 [19:34:35<6:44:21, 7.78s/it] {'loss': 0.3189, 'grad_norm': 0.7010237341276734, 'learning_rate': 1.6213687243900306e-06, 'epoch': 0.74} + 74%|███████▍ | 9070/12188 [19:34:35<6:44:21, 7.78s/it] 74%|███████▍ | 9071/12188 [19:34:42<6:29:41, 7.50s/it] {'loss': 0.3003, 'grad_norm': 0.7136902587226045, 'learning_rate': 1.6203893844921215e-06, 'epoch': 0.74} + 74%|███████▍ | 9071/12188 [19:34:42<6:29:41, 7.50s/it] 74%|███████▍ | 9072/12188 [19:34:50<6:36:10, 7.63s/it] {'loss': 0.2873, 'grad_norm': 0.7015107801535179, 'learning_rate': 1.6194102832568852e-06, 'epoch': 0.74} + 74%|███████▍ | 9072/12188 [19:34:50<6:36:10, 7.63s/it] 74%|███████▍ | 9073/12188 [19:34:57<6:29:58, 7.51s/it] {'loss': 0.3279, 'grad_norm': 0.7184989613323525, 'learning_rate': 1.6184314207534624e-06, 'epoch': 0.74} + 74%|███████▍ | 9073/12188 [19:34:57<6:29:58, 7.51s/it] 74%|███████▍ | 9074/12188 [19:35:04<6:15:57, 7.24s/it] {'loss': 0.2948, 'grad_norm': 0.6616924754603267, 'learning_rate': 1.6174527970509812e-06, 'epoch': 0.74} + 74%|███████▍ | 9074/12188 [19:35:04<6:15:57, 7.24s/it] 74%|███████▍ | 9075/12188 [19:35:10<6:08:52, 7.11s/it] {'loss': 0.3243, 'grad_norm': 0.7307505813173579, 'learning_rate': 1.6164744122185472e-06, 'epoch': 0.74} + 74%|███████▍ | 9075/12188 [19:35:10<6:08:52, 7.11s/it] 74%|███████▍ | 9076/12188 [19:35:17<6:05:43, 7.05s/it] {'loss': 0.29, 'grad_norm': 0.6836660117735431, 'learning_rate': 1.6154962663252543e-06, 'epoch': 0.74} + 74%|███████▍ | 9076/12188 [19:35:17<6:05:43, 7.05s/it] 74%|███████▍ | 9077/12188 [19:35:27<6:53:18, 7.97s/it] {'loss': 0.3002, 'grad_norm': 0.608680821176075, 'learning_rate': 1.6145183594401793e-06, 'epoch': 0.74} + 74%|███████▍ | 9077/12188 [19:35:27<6:53:18, 7.97s/it] 74%|███████▍ | 9078/12188 [19:35:34<6:34:04, 7.60s/it] {'loss': 0.2685, 'grad_norm': 0.6482706401415432, 'learning_rate': 1.6135406916323764e-06, 'epoch': 0.74} + 74%|███████▍ | 9078/12188 [19:35:34<6:34:04, 7.60s/it] 74%|███████▍ | 9079/12188 [19:35:42<6:42:55, 7.78s/it] {'loss': 0.2868, 'grad_norm': 1.1772271194438342, 'learning_rate': 1.6125632629708892e-06, 'epoch': 0.74} + 74%|███████▍ | 9079/12188 [19:35:42<6:42:55, 7.78s/it] 74%|███████▍ | 9080/12188 [19:35:50<6:36:56, 7.66s/it] {'loss': 0.3254, 'grad_norm': 0.701411424365915, 'learning_rate': 1.611586073524744e-06, 'epoch': 0.74} + 74%|███████▍ | 9080/12188 [19:35:50<6:36:56, 7.66s/it] 75%|███████▍ | 9081/12188 [19:35:57<6:23:04, 7.40s/it] {'loss': 0.2799, 'grad_norm': 0.7361903716794618, 'learning_rate': 1.6106091233629462e-06, 'epoch': 0.75} + 75%|███████▍ | 9081/12188 [19:35:57<6:23:04, 7.40s/it] 75%|███████▍ | 9082/12188 [19:36:03<6:16:12, 7.27s/it] {'loss': 0.3219, 'grad_norm': 0.7315931879518521, 'learning_rate': 1.6096324125544854e-06, 'epoch': 0.75} + 75%|███████▍ | 9082/12188 [19:36:03<6:16:12, 7.27s/it] 75%|███████▍ | 9083/12188 [19:36:10<6:11:25, 7.18s/it] {'loss': 0.284, 'grad_norm': 0.7032596652363772, 'learning_rate': 1.6086559411683377e-06, 'epoch': 0.75} + 75%|███████▍ | 9083/12188 [19:36:10<6:11:25, 7.18s/it] 75%|███████▍ | 9084/12188 [19:36:17<6:08:35, 7.12s/it] {'loss': 0.3033, 'grad_norm': 0.8831812746427092, 'learning_rate': 1.6076797092734575e-06, 'epoch': 0.75} + 75%|███████▍ | 9084/12188 [19:36:17<6:08:35, 7.12s/it] 75%|███████▍ | 9085/12188 [19:36:24<6:04:38, 7.05s/it] {'loss': 0.3371, 'grad_norm': 0.7182723518834118, 'learning_rate': 1.6067037169387873e-06, 'epoch': 0.75} + 75%|███████▍ | 9085/12188 [19:36:24<6:04:38, 7.05s/it] 75%|███████▍ | 9086/12188 [19:36:32<6:17:36, 7.30s/it] {'loss': 0.3004, 'grad_norm': 0.6582303628028963, 'learning_rate': 1.6057279642332474e-06, 'epoch': 0.75} + 75%|███████▍ | 9086/12188 [19:36:32<6:17:36, 7.30s/it] 75%|███████▍ | 9087/12188 [19:36:39<6:15:12, 7.26s/it] {'loss': 0.2881, 'grad_norm': 0.6663364135194446, 'learning_rate': 1.6047524512257473e-06, 'epoch': 0.75} + 75%|███████▍ | 9087/12188 [19:36:39<6:15:12, 7.26s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1348, in _get_item + raise ValueError( +ValueError: Number of image tokens ['data/slider/other_screenshot/original/AudioControlPanel_1739898931.4442017.png'] does not match number of images None +[Try #0] Failed to fetch sample 1863619 in VC:s3://gui-agent/jedi/images/component_v1_130k/component_v1_130k_extracted/. Exception: Number of image tokens ['data/slider/other_screenshot/original/AudioControlPanel_1739898931.4442017.png'] does not match number of images None +Problematic sample: {'image': 'data/slider/other_screenshot/original/AudioControlPanel_1739898931.4442017.png', 'conversations': [], 'image_id': 'data/slider/other_screenshot/original/AudioControlPanel_1739898931.4442017.png'} + 75%|███████▍ | 9088/12188 [19:36:47<6:28:21, 7.52s/it] {'loss': 0.3233, 'grad_norm': 0.6743970683969616, 'learning_rate': 1.6037771779851725e-06, 'epoch': 0.75} + 75%|███████▍ | 9088/12188 [19:36:48<6:28:21, 7.52s/it] 75%|███████▍ | 9089/12188 [19:36:55<6:24:13, 7.44s/it] {'loss': 0.277, 'grad_norm': 0.7505819672733494, 'learning_rate': 1.6028021445803966e-06, 'epoch': 0.75} + 75%|███████▍ | 9089/12188 [19:36:55<6:24:13, 7.44s/it] 75%|███████▍ | 9090/12188 [19:37:02<6:20:55, 7.38s/it] {'loss': 0.3055, 'grad_norm': 0.7318898604431618, 'learning_rate': 1.6018273510802768e-06, 'epoch': 0.75} + 75%|███████▍ | 9090/12188 [19:37:02<6:20:55, 7.38s/it] 75%|███████▍ | 9091/12188 [19:37:10<6:24:46, 7.45s/it] {'loss': 0.3098, 'grad_norm': 0.6778502740054468, 'learning_rate': 1.6008527975536487e-06, 'epoch': 0.75} + 75%|███████▍ | 9091/12188 [19:37:10<6:24:46, 7.45s/it] 75%|███████▍ | 9092/12188 [19:37:17<6:23:38, 7.44s/it] {'loss': 0.3267, 'grad_norm': 0.8894320238228726, 'learning_rate': 1.599878484069336e-06, 'epoch': 0.75} + 75%|███████▍ | 9092/12188 [19:37:17<6:23:38, 7.44s/it] 75%|███████▍ | 9093/12188 [19:37:25<6:27:25, 7.51s/it] {'loss': 0.2935, 'grad_norm': 0.7713967220081328, 'learning_rate': 1.598904410696141e-06, 'epoch': 0.75} + 75%|███████▍ | 9093/12188 [19:37:25<6:27:25, 7.51s/it] 75%|███████▍ | 9094/12188 [19:37:32<6:22:59, 7.43s/it] {'loss': 0.3312, 'grad_norm': 0.7281329513031478, 'learning_rate': 1.597930577502852e-06, 'epoch': 0.75} + 75%|███████▍ | 9094/12188 [19:37:32<6:22:59, 7.43s/it] 75%|███████▍ | 9095/12188 [19:37:40<6:25:05, 7.47s/it] {'loss': 0.3054, 'grad_norm': 0.8239568943234206, 'learning_rate': 1.596956984558241e-06, 'epoch': 0.75} + 75%|███████▍ | 9095/12188 [19:37:40<6:25:05, 7.47s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f7393c84540> +[Try #0] Failed to fetch sample 4744415 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f7393c84540> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Create account'"}, {'from': 'gpt', 'value': '\nclick(x=0.9395, y=0.1175)\n'}]} + 75%|███████▍ | 9096/12188 [19:37:46<6:16:46, 7.31s/it] {'loss': 0.2944, 'grad_norm': 0.734026979538311, 'learning_rate': 1.5959836319310612e-06, 'epoch': 0.75} + 75%|███████▍ | 9096/12188 [19:37:46<6:16:46, 7.31s/it] 75%|███████▍ | 9097/12188 [19:37:54<6:12:45, 7.24s/it] {'loss': 0.2798, 'grad_norm': 0.6846406801193483, 'learning_rate': 1.595010519690046e-06, 'epoch': 0.75} + 75%|███████▍ | 9097/12188 [19:37:54<6:12:45, 7.24s/it] 75%|███████▍ | 9098/12188 [19:38:01<6:09:43, 7.18s/it] {'loss': 0.3423, 'grad_norm': 0.6684459546380533, 'learning_rate': 1.5940376479039195e-06, 'epoch': 0.75} + 75%|███████▍ | 9098/12188 [19:38:01<6:09:43, 7.18s/it] 75%|███████▍ | 9099/12188 [19:38:08<6:15:03, 7.29s/it] {'loss': 0.2736, 'grad_norm': 0.6869583231095938, 'learning_rate': 1.5930650166413803e-06, 'epoch': 0.75} + 75%|███████▍ | 9099/12188 [19:38:08<6:15:03, 7.29s/it] 75%|███████▍ | 9100/12188 [19:38:16<6:20:02, 7.38s/it] {'loss': 0.3064, 'grad_norm': 0.6922256372328327, 'learning_rate': 1.5920926259711173e-06, 'epoch': 0.75} + 75%|███████▍ | 9100/12188 [19:38:16<6:20:02, 7.38s/it] 75%|███████▍ | 9101/12188 [19:38:25<6:47:02, 7.91s/it] {'loss': 0.2969, 'grad_norm': 0.7265339067274329, 'learning_rate': 1.5911204759617965e-06, 'epoch': 0.75} + 75%|███████▍ | 9101/12188 [19:38:25<6:47:02, 7.91s/it] 75%|███████▍ | 9102/12188 [19:38:34<7:07:00, 8.30s/it] {'loss': 0.2853, 'grad_norm': 0.7300266230464632, 'learning_rate': 1.590148566682071e-06, 'epoch': 0.75} + 75%|███████▍ | 9102/12188 [19:38:34<7:07:00, 8.30s/it] 75%|███████▍ | 9103/12188 [19:38:41<6:46:17, 7.90s/it] {'loss': 0.2807, 'grad_norm': 0.680961929903712, 'learning_rate': 1.5891768982005768e-06, 'epoch': 0.75} + 75%|███████▍ | 9103/12188 [19:38:41<6:46:17, 7.90s/it] 75%|███████▍ | 9104/12188 [19:38:48<6:38:55, 7.76s/it] {'loss': 0.3683, 'grad_norm': 0.8295506291640764, 'learning_rate': 1.588205470585929e-06, 'epoch': 0.75} + 75%|███████▍ | 9104/12188 [19:38:48<6:38:55, 7.76s/it] 75%|███████▍ | 9105/12188 [19:38:55<6:19:40, 7.39s/it] {'loss': 0.2941, 'grad_norm': 0.7745336401257691, 'learning_rate': 1.5872342839067305e-06, 'epoch': 0.75} + 75%|███████▍ | 9105/12188 [19:38:55<6:19:40, 7.39s/it] 75%|███████▍ | 9106/12188 [19:39:02<6:19:22, 7.39s/it] {'loss': 0.319, 'grad_norm': 0.6980051574387587, 'learning_rate': 1.5862633382315622e-06, 'epoch': 0.75} + 75%|███████▍ | 9106/12188 [19:39:02<6:19:22, 7.39s/it] 75%|███████▍ | 9107/12188 [19:39:11<6:32:27, 7.64s/it] {'loss': 0.3218, 'grad_norm': 0.7052923924436134, 'learning_rate': 1.5852926336289926e-06, 'epoch': 0.75} + 75%|███████▍ | 9107/12188 [19:39:11<6:32:27, 7.64s/it] 75%|███████▍ | 9108/12188 [19:39:18<6:22:32, 7.45s/it] {'loss': 0.2935, 'grad_norm': 0.7001588659351242, 'learning_rate': 1.5843221701675725e-06, 'epoch': 0.75} + 75%|███████▍ | 9108/12188 [19:39:18<6:22:32, 7.45s/it] 75%|███████▍ | 9109/12188 [19:39:24<6:11:44, 7.24s/it] {'loss': 0.2744, 'grad_norm': 0.8195933065597554, 'learning_rate': 1.5833519479158332e-06, 'epoch': 0.75} + 75%|███████▍ | 9109/12188 [19:39:24<6:11:44, 7.24s/it] 75%|███████▍ | 9110/12188 [19:39:33<6:35:10, 7.70s/it] {'loss': 0.3073, 'grad_norm': 0.8725899815376125, 'learning_rate': 1.5823819669422885e-06, 'epoch': 0.75} + 75%|███████▍ | 9110/12188 [19:39:33<6:35:10, 7.70s/it] 75%|███████▍ | 9111/12188 [19:39:41<6:44:40, 7.89s/it] {'loss': 0.315, 'grad_norm': 0.6630305558588349, 'learning_rate': 1.5814122273154404e-06, 'epoch': 0.75} + 75%|███████▍ | 9111/12188 [19:39:41<6:44:40, 7.89s/it] 75%|███████▍ | 9112/12188 [19:39:49<6:32:27, 7.66s/it] {'loss': 0.2608, 'grad_norm': 0.6857815119340577, 'learning_rate': 1.580442729103766e-06, 'epoch': 0.75} + 75%|███████▍ | 9112/12188 [19:39:49<6:32:27, 7.66s/it] 75%|███████▍ | 9113/12188 [19:39:56<6:27:35, 7.56s/it] {'loss': 0.298, 'grad_norm': 0.6924452157841015, 'learning_rate': 1.5794734723757343e-06, 'epoch': 0.75} + 75%|███████▍ | 9113/12188 [19:39:56<6:27:35, 7.56s/it] 75%|███████▍ | 9114/12188 [19:40:04<6:30:59, 7.63s/it] {'loss': 0.3158, 'grad_norm': 0.6927464430144495, 'learning_rate': 1.5785044571997888e-06, 'epoch': 0.75} + 75%|███████▍ | 9114/12188 [19:40:04<6:30:59, 7.63s/it] 75%|███████▍ | 9115/12188 [19:40:11<6:31:15, 7.64s/it] {'loss': 0.2924, 'grad_norm': 0.7163074354249813, 'learning_rate': 1.5775356836443611e-06, 'epoch': 0.75} + 75%|███████▍ | 9115/12188 [19:40:11<6:31:15, 7.64s/it] 75%|███████▍ | 9116/12188 [19:40:19<6:27:54, 7.58s/it] {'loss': 0.3282, 'grad_norm': 0.7304108684829801, 'learning_rate': 1.5765671517778668e-06, 'epoch': 0.75} + 75%|███████▍ | 9116/12188 [19:40:19<6:27:54, 7.58s/it] 75%|███████▍ | 9117/12188 [19:40:26<6:15:39, 7.34s/it] {'loss': 0.3243, 'grad_norm': 0.708041766507495, 'learning_rate': 1.5755988616686984e-06, 'epoch': 0.75} + 75%|███████▍ | 9117/12188 [19:40:26<6:15:39, 7.34s/it] 75%|███████▍ | 9118/12188 [19:40:32<6:05:12, 7.14s/it] {'loss': 0.3082, 'grad_norm': 0.7034397893539457, 'learning_rate': 1.5746308133852384e-06, 'epoch': 0.75} + 75%|███████▍ | 9118/12188 [19:40:32<6:05:12, 7.14s/it] 75%|███████▍ | 9119/12188 [19:40:39<6:00:07, 7.04s/it] {'loss': 0.3033, 'grad_norm': 0.6719475712949774, 'learning_rate': 1.5736630069958453e-06, 'epoch': 0.75} + 75%|███████▍ | 9119/12188 [19:40:39<6:00:07, 7.04s/it] 75%|███████▍ | 9120/12188 [19:40:46<5:59:01, 7.02s/it] {'loss': 0.3549, 'grad_norm': 0.6573300974690649, 'learning_rate': 1.5726954425688662e-06, 'epoch': 0.75} + 75%|███████▍ | 9120/12188 [19:40:46<5:59:01, 7.02s/it] 75%|███████▍ | 9121/12188 [19:40:54<6:10:23, 7.25s/it] {'loss': 0.2959, 'grad_norm': 0.7059731007495458, 'learning_rate': 1.5717281201726298e-06, 'epoch': 0.75} + 75%|███████▍ | 9121/12188 [19:40:54<6:10:23, 7.25s/it] 75%|███████▍ | 9122/12188 [19:41:00<6:01:06, 7.07s/it] {'loss': 0.2991, 'grad_norm': 0.7273093219079709, 'learning_rate': 1.5707610398754442e-06, 'epoch': 0.75} + 75%|███████▍ | 9122/12188 [19:41:00<6:01:06, 7.07s/it] 75%|███████▍ | 9123/12188 [19:41:08<6:03:36, 7.12s/it] {'loss': 0.3175, 'grad_norm': 0.6614552773000424, 'learning_rate': 1.5697942017456059e-06, 'epoch': 0.75} + 75%|███████▍ | 9123/12188 [19:41:08<6:03:36, 7.12s/it] 75%|███████▍ | 9124/12188 [19:41:14<5:56:20, 6.98s/it] {'loss': 0.2959, 'grad_norm': 0.6813994296428326, 'learning_rate': 1.5688276058513897e-06, 'epoch': 0.75} + 75%|███████▍ | 9124/12188 [19:41:14<5:56:20, 6.98s/it] 75%|███████▍ | 9125/12188 [19:41:21<5:52:39, 6.91s/it] {'loss': 0.2721, 'grad_norm': 0.6850115374335073, 'learning_rate': 1.5678612522610542e-06, 'epoch': 0.75} + 75%|███████▍ | 9125/12188 [19:41:21<5:52:39, 6.91s/it] 75%|███████▍ | 9126/12188 [19:41:28<5:56:49, 6.99s/it] {'loss': 0.2985, 'grad_norm': 0.7565135772858024, 'learning_rate': 1.5668951410428447e-06, 'epoch': 0.75} + 75%|███████▍ | 9126/12188 [19:41:28<5:56:49, 6.99s/it] 75%|███████▍ | 9127/12188 [19:41:36<6:02:05, 7.10s/it] {'loss': 0.2738, 'grad_norm': 0.6455502349957056, 'learning_rate': 1.5659292722649826e-06, 'epoch': 0.75} + 75%|███████▍ | 9127/12188 [19:41:36<6:02:05, 7.10s/it] 75%|███████▍ | 9128/12188 [19:41:42<5:55:54, 6.98s/it] {'loss': 0.352, 'grad_norm': 0.6720204374668529, 'learning_rate': 1.5649636459956796e-06, 'epoch': 0.75} + 75%|███████▍ | 9128/12188 [19:41:42<5:55:54, 6.98s/it] 75%|███████▍ | 9129/12188 [19:41:50<6:02:49, 7.12s/it] {'loss': 0.2921, 'grad_norm': 1.9400662596238671, 'learning_rate': 1.563998262303124e-06, 'epoch': 0.75} + 75%|███████▍ | 9129/12188 [19:41:50<6:02:49, 7.12s/it]W0817 16:52:24.151000 2755 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 3430 closing signal SIGTERM +W0817 16:52:24.173000 2755 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 3431 closing signal SIGTERM +W0817 16:52:24.174000 2755 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 3432 closing signal SIGTERM +W0817 16:52:24.174000 2755 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 3433 closing signal SIGTERM +W0817 16:52:24.175000 2755 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 3434 closing signal SIGTERM +W0817 16:52:24.176000 2755 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 3435 closing signal SIGTERM +W0817 16:52:24.176000 2755 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 3436 closing signal SIGTERM +E0817 16:52:38.230000 2755 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:869] failed (exitcode: -9) local_rank: 7 (pid: 3437) of binary: /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/bin/python +Traceback (most recent call last): + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/bin/torchrun", line 8, in + sys.exit(main()) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper + return f(*args, **kwargs) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py", line 918, in main + run(args) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py", line 909, in run + elastic_launch( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 138, in __call__ + return launch_agent(self._config, self._entrypoint, list(args)) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 269, in launch_agent + raise ChildFailedError( +torch.distributed.elastic.multiprocessing.errors.ChildFailedError: +===================================================== +qwenvl/train/train_qwen.py FAILED +----------------------------------------------------- +Failures: + +----------------------------------------------------- +Root Cause (first observed failure): +[0]: + time : 2025-08-17_16:52:24 + host : HOST-10-140-60-36 + rank : 7 (local_rank: 7) + exitcode : -9 (pid: 3437) + error_file: + traceback : Signal 9 (SIGKILL) received by PID 3437 +===================================================== +W0817 17:00:29.507000 22126 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 22799 closing signal SIGTERM +W0817 17:00:29.532000 22126 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 22800 closing signal SIGTERM +W0817 17:00:29.532000 22126 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 22802 closing signal SIGTERM +W0817 17:00:29.533000 22126 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 22803 closing signal SIGTERM +W0817 17:00:29.533000 22126 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 22804 closing signal SIGTERM +W0817 17:00:29.534000 22126 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 22805 closing signal SIGTERM +W0817 17:00:29.534000 22126 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 22806 closing signal SIGTERM +E0817 17:00:39.368000 22126 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:869] failed (exitcode: -9) local_rank: 2 (pid: 22801) of binary: /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/bin/python +Traceback (most recent call last): + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/bin/torchrun", line 8, in + sys.exit(main()) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper + return f(*args, **kwargs) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py", line 918, in main + run(args) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py", line 909, in run + elastic_launch( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 138, in __call__ + return launch_agent(self._config, self._entrypoint, list(args)) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 269, in launch_agent + raise ChildFailedError( +torch.distributed.elastic.multiprocessing.errors.ChildFailedError: +====================================================== +qwenvl/train/train_qwen.py FAILED +------------------------------------------------------ +Failures: + +------------------------------------------------------ +Root Cause (first observed failure): +[0]: + time : 2025-08-17_17:00:29 + host : HOST-10-140-66-169 + rank : 58 (local_rank: 2) + exitcode : -9 (pid: 22801) + error_file: + traceback : Signal 9 (SIGKILL) received by PID 22801 +====================================================== +[rank47]:[E817 17:02:21.061082429 ProcessGroupNCCL.cpp:629] [Rank 47] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600006 milliseconds before timing out. +[rank47]:[E817 17:02:21.061242449 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 47] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank47]:[E817 17:02:21.061257599 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank43]:[E817 17:02:21.066033419 ProcessGroupNCCL.cpp:629] [Rank 43] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600011 milliseconds before timing out. +[rank43]:[E817 17:02:21.066153853 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 43] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank43]:[E817 17:02:21.066166503 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank40]:[E817 17:02:21.075548151 ProcessGroupNCCL.cpp:629] [Rank 40] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600020 milliseconds before timing out. +[rank40]:[E817 17:02:21.075677945 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 40] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank40]:[E817 17:02:21.075692912 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank10]:[E817 17:02:21.937345434 ProcessGroupNCCL.cpp:629] [Rank 10] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600006 milliseconds before timing out. +[rank55]:[E817 17:02:21.988424922 ProcessGroupNCCL.cpp:629] [Rank 55] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600012 milliseconds before timing out. +[rank27]:[E817 17:02:21.984804006 ProcessGroupNCCL.cpp:629] [Rank 27] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600015 milliseconds before timing out. +[rank31]:[E817 17:02:21.985318736 ProcessGroupNCCL.cpp:629] [Rank 31] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600015 milliseconds before timing out. +[rank38]:[E817 17:02:21.019674952 ProcessGroupNCCL.cpp:629] [Rank 38] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600038 milliseconds before timing out. +667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank38]:[E817 17:02:21.019847129 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 38] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank38]:[E817 17:02:21.019858335 ProcessGroupNCCL.cpp:[rank41]:[E817 17:02:21.112658024 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 41] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank41]:[E817 17:02:21.112668490 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank37]:[E817 17:02:21.026197254 ProcessGroupNCCL.cpp:629] [Rank 37] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600042 milliseconds before timing out. +[rank37]:[E817 17:02:21.026322390 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 37] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank37]:[E817 17:02:21.026332350 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank36]:[E817 17:02:21.031230191 ProcessGroupNCCL.cpp:629] [Rank 36] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600049 milliseconds before timing out. +[rank36]:[E817 17:02:21.031343563 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 36] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank36]:[E817 17:02:21.031353762 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank32]:[E817 17:02:21.035574593 ProcessGroupNCCL.cpp:629] [Rank 32] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600052 milliseconds before timing out. +[rank13]:[E817 17:02:21.979068765 ProcessGroupNCCL.cpp:629] [Rank 13] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600048 milliseconds before timing out. +667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank24]:[E817 17:02:21.015139021 ProcessGroupNCCL.cpp:629] [Rank 24] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600045 milliseconds before timing out. +[rank26]:[E817 17:02:21.016194724 ProcessGroupNCCL.cpp:629] [Rank 26] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600046 milliseconds before timing out. +[rank29]:[E817 17:02:21.017440396 ProcessGroupNCCL.cpp:629] [Rank 29] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600046 milliseconds before timing out. +[rank39]:[E817 17:02:21.048177792 ProcessGroupNCCL.cpp:629] [Rank 39] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600065 milliseconds before timing out. +[rank13]:[E817 17:02:21.988357683 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank10]:[E817 17:02:21.988421744 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 10] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank39]:[E817 17:02:21.048310461 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 39] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank39]:[E817 17:02:21.048320512 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank34]:[E817 17:02:21.049277568 ProcessGroupNCCL.cpp:629] [Rank 34] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600067 milliseconds before timing out. +[rank34]:[E817 17:02:21.049385662 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 34] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank34]:[E817 17:02:21.049397651 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +600000) ran for 600050 milliseconds before timing out. +[rank44]:[E817 17:02:21.140327346 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 44] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank44]:[E817 17:02:21.140337781 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank53]:[E817 17:02:21.034522227 ProcessGroupNCCL.cpp:629] [Rank 53] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600058 milliseconds before timing out. +[rank46]:[E817 17:02:21.148676235 ProcessGroupNCCL.cpp:629] [Rank 46] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600093 milliseconds before timing out. +[rank46]:[E817 17:02:21.148802582 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 46] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank46]:[E817 17:02:21.148812392 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank14]:[E817 17:02:21.004319019 ProcessGroupNCCL.cpp:629] [Rank 14] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600073 milliseconds before timing out. +[rank35]:[E817 17:02:21.067080824 ProcessGroupNCCL.cpp:629] [Rank 35] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600085 milliseconds before timing out. +[rank14]:[E817 17:02:21.004468850 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 14] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank14]:[E817 17:02:21.004481211 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank33]:[E817 17:02:21.067585606 ProcessGroupNCCL.cpp:629] [Rank 33] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600084 milliseconds before timing out. +[rank33]:[E817 17:02:21.067691205 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 33] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank33]:[E817 17:02:21.067701832 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank21]:[E817 17:02:21.001178055 ProcessGroupNCCL.cpp:629] [Rank 21] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600071 milliseconds before timing out. +[rank28]:[E817 17:02:21.053473935 ProcessGroupNCCL.cpp:629] [Rank 28] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600083 milliseconds before timing out. +[rank9]:[E817 17:02:21.025553318 ProcessGroupNCCL.cpp:629] [Rank 9] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600094 milliseconds before timing out. +. +[rank9]:[E817 17:02:21.025699650 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 9] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank9]:[E817 17:02:21.025713473 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank12]:[E817 17:02:21.029077626 ProcessGroupNCCL.cpp:629] [Rank 12] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600097 milliseconds before timing out. +[rank12]:[E817 17:02:21.029229335 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 12] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank12]:[E817 17:02:21.029240155 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank11]:[E817 17:02:21.029492525 ProcessGroupNCCL.cpp:629] [Rank 11] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600099 milliseconds before timing out. +[rank11]:[E817 17:02:21.029614008 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 11] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank11]:[E817 17:02:21.029627152 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank8]:[E817 17:02:21.029881147 ProcessGroupNCCL.cpp:629] [Rank 8] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600099 milliseconds before timing out. +[rank8]:[E817 17:02:21.030033100 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 8] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank8]:[E817 17:02:21.030044124 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank51]:[E817 17:02:21.070351377 ProcessGroupNCCL.cpp:629] [Rank 51] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600094 milliseconds before timing out. +[rank17]:[E817 17:02:21.021440287 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 17] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank24]:[E817 17:02:21.067720161 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank31]:[E817 17:02:21.068384723 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 31] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank31]:[E817 17:02:21.068406777 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank30]:[E817 17:02:21.068536938 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 30] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank30]:[E817 17:02:21.068555821 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank29]:[E817 17:02:21.068836754 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 29] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank29]:[E817 17:02:21.068853480 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank25]:[E817 17:02:21.069437660 ProcessGroupNCCL.cpp:629] [Rank 25] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600098 milliseconds before timing out. +[rank25]:[E817 17:02:21.069554493 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 25] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank25]:[E817 17:02:21.069566829 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank26]:[E817 17:02:21.069658181 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 26] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank26]:[E817 17:02:21.069677146 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank28]:[E817 17:02:21.069859575 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 28] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank28]:[E817 17:02:21.069876271 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank27]:[E817 17:02:21.070992550 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 27] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank27]:[E817 17:02:21.071017975 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank20]:[E817 17:02:21.022395963 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank16]:[E817 17:02:21.022735427 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 16] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank16]:[E817 17:02:21.022755570 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank18]:[E817 17:02:21.023037598 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 18] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank18]:[E817 17:02:21.023053412 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank23]:[E817 17:02:21.023336922 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 23] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank23]:[E817 17:02:21.023355042 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank19]:[E817 17:02:21.024417202 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 19] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank19]:[E817 17:02:21.024431724 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank54]:[E817 17:02:21.083087640 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 54] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank48]:[E817 17:02:21.083098940 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 48] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank54]:[E817 17:02:21.083106953 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank48]:[E817 17:02:21.083113726 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank53]:[E817 17:02:21.084109703 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 53] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank53]:[E817 17:02:21.084124649 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank52]:[E817 17:02:21.084149854 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 52] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank52]:[E817 17:02:21.084159429 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank51]:[E817 17:02:21.084339693 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 51] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank51]:[E817 17:02:21.084351248 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank50]:[E817 17:02:21.085188521 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 50] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank50]:[E817 17:02:21.085202401 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank49]:[E817 17:02:21.085620706 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 49] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank49]:[E817 17:02:21.085641863 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank55]:[E817 17:02:21.086484771 ProcessGroupNCCL.cpp:2168] [PG ID 1 PG GUID 1 Rank 55] failure detected by watchdog at work sequence id: 5386964 PG status: last enqueued work: 5386966, last completed work: 5386963 +[rank55]:[E817 17:02:21.086502000 ProcessGroupNCCL.cpp:667] Stack trace of the failed collective not found, potentially because FlightRecorder is disabled. You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value. +[rank45]:[E817 17:02:22.001721564 ProcessGroupNCCL.cpp:681] [Rank 45] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank45]:[E817 17:02:22.001741663 ProcessGroupNCCL.cpp:695] [Rank 45] To avoid data inconsistency, we are taking the entire process down. +[rank45]:[E817 17:02:22.041885737 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 45] Process group watchdog thread terminated with exception: [Rank 45] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600049 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7fea5d4731b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7fea5e7bcc74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7fea5e7be7d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7fea5e7bf6ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7feaa78365c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7feab7c6bdd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7feab728bead in /lib64/libc.so.6) + +[rank53]:[E817 17:02:23.098192873 ProcessGroupNCCL.cpp:681] [Rank 53] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank53]:[E817 17:02:23.098205909 ProcessGroupNCCL.cpp:695] [Rank 53] To avoid data inconsistency, we are taking the entire process down. +[rank53]:[E817 17:02:24.138415343 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 53] Process group watchdog thread terminated with exception: [Rank 53] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600058 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f1d0eb0f1b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f1d0fe58c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f1d0fe5a7d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f1d0fe5b6ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f1d58ed25c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f1d69307dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f1d68927ead in /lib64/libc.so.6) + +[rank21]:[E817 17:02:24.216847488 ProcessGroupNCCL.cpp:681] [Rank 21] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank21]:[E817 17:02:24.216859794 ProcessGroupNCCL.cpp:695] [Rank 21] To avoid data inconsistency, we are taking the entire process down. +[rank21]:[E817 17:02:24.256108080 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 21] Process group watchdog thread terminated with exception: [Rank 21] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600071 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f7a6b3891b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f7a6c6d2c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f7a6c6d47d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f7a6c6d56ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f7ab574c5c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f7ac5b81dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f7ac51a1ead in /lib64/libc.so.6) + +[rank41]:[E817 17:02:24.577989727 ProcessGroupNCCL.cpp:681] [Rank 41] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank41]:[E817 17:02:24.578017660 ProcessGroupNCCL.cpp:695] [Rank 41] To avoid data inconsistency, we are taking the entire process down. +[rank41]:[E817 17:02:24.579432729 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 41] Process group watchdog thread terminated with exception: [Rank 41] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600057 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7ff7573561b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7ff75869fc74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7ff7586a17d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7ff7586a26ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7ff7a17195c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7ff7b1b4edd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7ff7b116eead in /lib64/libc.so.6) + +[rank43]:[E817 17:02:24.606008282 ProcessGroupNCCL.cpp:681] [Rank 43] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank43]:[E817 17:02:24.606027835 ProcessGroupNCCL.cpp:695] [Rank 43] To avoid data inconsistency, we are taking the entire process down. +[rank43]:[E817 17:02:24.607368278 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 43] Process group watchdog thread terminated with exception: [Rank 43] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600011 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f782907e1b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f782a3c7c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f782a3c97d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f782a3ca6ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f78734415c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f7883876dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f7882e96ead in /lib64/libc.so.6) + +[rank35]:[E817 17:02:24.775608624 ProcessGroupNCCL.cpp:681] [Rank 35] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank35]:[E817 17:02:24.775621576 ProcessGroupNCCL.cpp:695] [Rank 35] To avoid data inconsistency, we are taking the entire process down. +[rank35]:[E817 17:02:24.776793351 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 35] Process group watchdog thread terminated with exception: [Rank 35] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600085 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f39a557f1b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f39a68c8c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f39a68ca7d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f39a68cb6ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f39ef9425c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f39ffd77dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f39ff397ead in /lib64/libc.so.6) + +[rank37]:[E817 17:02:24.829915175 ProcessGroupNCCL.cpp:681] [Rank 37] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank37]:[E817 17:02:24.829926001 ProcessGroupNCCL.cpp:695] [Rank 37] To avoid data inconsistency, we are taking the entire process down. +[rank37]:[E817 17:02:24.831074878 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 37] Process group watchdog thread terminated with exception: [Rank 37] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600042 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f8fe7c341b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f8fe8f7dc74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f8fe8f7f7d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f8fe8f806ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f9031ff75c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f904242cdd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f9041a4cead in /lib64/libc.so.6) + +[rank51]:[E817 17:02:24.007486711 ProcessGroupNCCL.cpp:681] [Rank 51] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank51]:[E817 17:02:24.007505481 ProcessGroupNCCL.cpp:695] [Rank 51] To avoid data inconsistency, we are taking the entire process down. +[rank51]:[E817 17:02:24.008848097 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 51] Process group watchdog thread terminated with exception: [Rank 51] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600094 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7fb5acffa1b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7fb5ae343c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7fb5ae3457d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7fb5ae3466ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7fb5f73bd5c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7fb6077f2dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7fb606e12ead in /lib64/libc.so.6) + +W0817 17:02:24.970000 100739 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 101499 closing signal SIGTERM +W0817 17:02:24.990000 100739 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 101500 closing signal SIGTERM +W0817 17:02:24.991000 100739 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 101501 closing signal SIGTERM +W0817 17:02:24.991000 100739 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 101502 closing signal SIGTERM +W0817 17:02:24.991000 100739 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 101503 closing signal SIGTERM +W0817 17:02:24.992000 100739 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 101505 closing signal SIGTERM +W0817 17:02:24.992000 100739 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 101506 closing signal SIGTERM +[rank29]:[E817 17:02:25.131041213 ProcessGroupNCCL.cpp:681] [Rank 29] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank29]:[E817 17:02:25.131058969 ProcessGroupNCCL.cpp:695] [Rank 29] To avoid data inconsistency, we are taking the entire process down. +[rank27]:[E817 17:02:25.131073248 ProcessGroupNCCL.cpp:681] [Rank 27] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank27]:[E817 17:02:25.131085088 ProcessGroupNCCL.cpp:695] [Rank 27] To avoid data inconsistency, we are taking the entire process down. +[rank25]:[E817 17:02:25.131502152 ProcessGroupNCCL.cpp:681] [Rank 25] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank25]:[E817 17:02:25.131513599 ProcessGroupNCCL.cpp:695] [Rank 25] To avoid data inconsistency, we are taking the entire process down. +[rank27]:[E817 17:02:25.132301349 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 27] Process group watchdog thread terminated with exception: [Rank 27] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600015 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f4b27a8c1b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f4b28dd5c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f4b28dd77d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f4b28dd86ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f4b71e4f5c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f4b82284dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f4b818a4ead in /lib64/libc.so.6) + +[rank25]:[E817 17:02:25.132713809 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 25] Process group watchdog thread terminated with exception: [Rank 25] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600098 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f3d8beee1b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f3d8d237c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f3d8d2397d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f3d8d23a6ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f3dd62b15c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f3de66e6dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f3de5d06ead in /lib64/libc.so.6) + +[rank29]:[E817 17:02:25.171294043 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 29] Process group watchdog thread terminated with exception: [Rank 29] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600046 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f64d4ac01b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f64d5e09c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f64d5e0b7d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f64d5e0c6ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f651ee835c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f652f2b8dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f652e8d8ead in /lib64/libc.so.6) + +[rank11]:[E817 17:02:25.315637533 ProcessGroupNCCL.cpp:681] [Rank 11] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank11]:[E817 17:02:25.315657724 ProcessGroupNCCL.cpp:695] [Rank 11] To avoid data inconsistency, we are taking the entire process down. +[rank13]:[E817 17:02:25.315947608 ProcessGroupNCCL.cpp:681] [Rank 13] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank13]:[E817 17:02:25.315963317 ProcessGroupNCCL.cpp:695] [Rank 13] To avoid data inconsistency, we are taking the entire process down. +[rank13]:[E817 17:02:25.317283043 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 13] Process group watchdog thread terminated with exception: [Rank 13] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600048 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7fd0a2bcc1b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7fd0a3f15c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7fd0a3f177d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7fd0a3f186ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7fd0ecf8f5c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7fd0fd3c4dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7fd0fc9e4ead in /lib64/libc.so.6) + +[rank11]:[E817 17:02:25.359343096 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 11] Process group watchdog thread terminated with exception: [Rank 11] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600099 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f76f701e1b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f76f8367c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f76f83697d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f76f836a6ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f77413e15c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f7751816dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f7750e36ead in /lib64/libc.so.6) + +W0817 17:02:25.490000 69987 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 70662 closing signal SIGTERM +W0817 17:02:25.502000 69987 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 70663 closing signal SIGTERM +W0817 17:02:25.503000 69987 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 70664 closing signal SIGTERM +W0817 17:02:25.503000 69987 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 70665 closing signal SIGTERM +W0817 17:02:25.503000 69987 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 70666 closing signal SIGTERM +W0817 17:02:25.504000 69987 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 70668 closing signal SIGTERM +W0817 17:02:25.504000 69987 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 70669 closing signal SIGTERM +[rank19]:[E817 17:02:25.713772768 ProcessGroupNCCL.cpp:681] [Rank 19] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank19]:[E817 17:02:25.713790445 ProcessGroupNCCL.cpp:695] [Rank 19] To avoid data inconsistency, we are taking the entire process down. +[rank19]:[E817 17:02:25.715037480 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 19] Process group watchdog thread terminated with exception: [Rank 19] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600081 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7faebb49e1b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7faebc7e7c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7faebc7e97d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7faebc7ea6ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7faf058615c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7faf15c96dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7faf152b6ead in /lib64/libc.so.6) + +[rank33]:[E817 17:02:25.858792212 ProcessGroupNCCL.cpp:681] [Rank 33] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank33]:[E817 17:02:25.858808947 ProcessGroupNCCL.cpp:695] [Rank 33] To avoid data inconsistency, we are taking the entire process down. +[rank33]:[E817 17:02:25.860029322 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 33] Process group watchdog thread terminated with exception: [Rank 33] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600084 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7fbf1f2661b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7fbf205afc74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7fbf205b17d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7fbf205b26ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7fbf696295c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7fbf79a5edd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7fbf7907eead in /lib64/libc.so.6) + +W0817 17:02:25.842000 50715 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 51367 closing signal SIGTERM +W0817 17:02:25.857000 50715 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 51368 closing signal SIGTERM +W0817 17:02:25.858000 50715 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 51369 closing signal SIGTERM +W0817 17:02:25.859000 50715 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 51370 closing signal SIGTERM +W0817 17:02:25.859000 50715 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 51371 closing signal SIGTERM +W0817 17:02:25.859000 50715 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 51373 closing signal SIGTERM +W0817 17:02:25.860000 50715 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 51374 closing signal SIGTERM +[rank15]:[E817 17:02:25.040828093 ProcessGroupNCCL.cpp:681] [Rank 15] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank15]:[E817 17:02:25.040850566 ProcessGroupNCCL.cpp:695] [Rank 15] To avoid data inconsistency, we are taking the entire process down. +[rank15]:[E817 17:02:25.042211594 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 15] Process group watchdog thread terminated with exception: [Rank 15] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600060 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f10567de1b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f1057b27c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f1057b297d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f1057b2a6ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f10a0ba15c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f10b0fd6dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f10b05f6ead in /lib64/libc.so.6) + +[rank31]:[E817 17:02:25.097490354 ProcessGroupNCCL.cpp:681] [Rank 31] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank31]:[E817 17:02:25.097507745 ProcessGroupNCCL.cpp:695] [Rank 31] To avoid data inconsistency, we are taking the entire process down. +[rank31]:[E817 17:02:25.098761674 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 31] Process group watchdog thread terminated with exception: [Rank 31] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600015 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f2c90b581b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f2c91ea1c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f2c91ea37d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f2c91ea46ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f2cdaf1b5c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f2ceb350dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f2cea970ead in /lib64/libc.so.6) + +[rank24]:[E817 17:02:25.110116782 ProcessGroupNCCL.cpp:681] [Rank 24] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank24]:[E817 17:02:25.110133622 ProcessGroupNCCL.cpp:695] [Rank 24] To avoid data inconsistency, we are taking the entire process down. +[rank24]:[E817 17:02:25.111421992 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 24] Process group watchdog thread terminated with exception: [Rank 24] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600045 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f77909881b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f7791cd1c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f7791cd37d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f7791cd46ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f77dad4b5c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f77eb180dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f77ea7a0ead in /lib64/libc.so.6) + +[rank26]:[E817 17:02:25.112097849 ProcessGroupNCCL.cpp:681] [Rank 26] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank26]:[E817 17:02:25.112109458 ProcessGroupNCCL.cpp:695] [Rank 26] To avoid data inconsistency, we are taking the entire process down. +[rank26]:[E817 17:02:26.113346332 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 26] Process group watchdog thread terminated with exception: [Rank 26] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600046 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f52cb17f1b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f52cc4c8c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f52cc4ca7d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f52cc4cb6ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f53155425c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f5325977dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f5324f97ead in /lib64/libc.so.6) + +[rank28]:[E817 17:02:26.114834752 ProcessGroupNCCL.cpp:681] [Rank 28] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank28]:[E817 17:02:26.114847454 ProcessGroupNCCL.cpp:695] [Rank 28] To avoid data inconsistency, we are taking the entire process down. +[rank30]:[E817 17:02:26.115842905 ProcessGroupNCCL.cpp:681] [Rank 30] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank30]:[E817 17:02:26.115855363 ProcessGroupNCCL.cpp:695] [Rank 30] To avoid data inconsistency, we are taking the entire process down. +[rank28]:[E817 17:02:26.116076105 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 28] Process group watchdog thread terminated with exception: [Rank 28] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600083 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f03841fd1b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f0385546c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f03855487d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f03855496ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f03ce5c05c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f03de9f5dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f03de015ead in /lib64/libc.so.6) + +[rank30]:[E817 17:02:26.117098731 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 30] Process group watchdog thread terminated with exception: [Rank 30] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600063 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f71c51871b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f71c64d0c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f71c64d27d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f71c64d36ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f720f54a5c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f721f97fdd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f721ef9fead in /lib64/libc.so.6) + +[rank39]:[E817 17:02:26.156343017 ProcessGroupNCCL.cpp:681] [Rank 39] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank39]:[E817 17:02:26.156362833 ProcessGroupNCCL.cpp:695] [Rank 39] To avoid data inconsistency, we are taking the entire process down. +[rank39]:[E817 17:02:26.157621765 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 39] Process group watchdog thread terminated with exception: [Rank 39] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600065 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f3a1d6af1b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f3a1e9f8c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f3a1e9fa7d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f3a1e9fb6ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f3a67a725c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f3a77ea7dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f3a774c7ead in /lib64/libc.so.6) + +[rank32]:[E817 17:02:26.175515182 ProcessGroupNCCL.cpp:681] [Rank 32] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank32]:[E817 17:02:26.175528587 ProcessGroupNCCL.cpp:695] [Rank 32] To avoid data inconsistency, we are taking the entire process down. +[rank32]:[E817 17:02:26.176718617 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 32] Process group watchdog thread terminated with exception: [Rank 32] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600052 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7fbb0f8e31b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7fbb10c2cc74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7fbb10c2e7d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7fbb10c2f6ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7fbb59ca65c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7fbb6a0dbdd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7fbb696fbead in /lib64/libc.so.6) + +[rank36]:[E817 17:02:26.184542720 ProcessGroupNCCL.cpp:681] [Rank 36] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank36]:[E817 17:02:26.184558815 ProcessGroupNCCL.cpp:695] [Rank 36] To avoid data inconsistency, we are taking the entire process down. +[rank34]:[E817 17:02:26.185229167 ProcessGroupNCCL.cpp:681] [Rank 34] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank34]:[E817 17:02:26.185240610 ProcessGroupNCCL.cpp:695] [Rank 34] To avoid data inconsistency, we are taking the entire process down. +[rank36]:[E817 17:02:26.185744516 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 36] Process group watchdog thread terminated with exception: [Rank 36] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600049 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f433e1841b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f433f4cdc74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f433f4cf7d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f433f4d06ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f43885475c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f439897cdd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f4397f9cead in /lib64/libc.so.6) + +[rank34]:[E817 17:02:26.186395684 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 34] Process group watchdog thread terminated with exception: [Rank 34] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600067 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f82e94901b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f82ea7d9c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f82ea7db7d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f82ea7dc6ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f83338535c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f8343c88dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f83432a8ead in /lib64/libc.so.6) + +[rank38]:[E817 17:02:26.187085303 ProcessGroupNCCL.cpp:681] [Rank 38] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank38]:[E817 17:02:26.187095685 ProcessGroupNCCL.cpp:695] [Rank 38] To avoid data inconsistency, we are taking the entire process down. +[rank38]:[E817 17:02:26.188253964 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 38] Process group watchdog thread terminated with exception: [Rank 38] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600038 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f51cef441b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f51d028dc74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f51d028f7d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f51d02906ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f52193075c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f522973cdd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f5228d5cead in /lib64/libc.so.6) + +W0817 17:02:26.376000 106705 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 107471 closing signal SIGTERM +W0817 17:02:26.400000 106705 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 107472 closing signal SIGTERM +W0817 17:02:26.401000 106705 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 107473 closing signal SIGTERM +W0817 17:02:26.401000 106705 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 107474 closing signal SIGTERM +W0817 17:02:26.401000 106705 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 107475 closing signal SIGTERM +W0817 17:02:26.401000 106705 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 107477 closing signal SIGTERM +W0817 17:02:26.402000 106705 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 107478 closing signal SIGTERM +[rank9]:[E817 17:02:26.006812526 ProcessGroupNCCL.cpp:681] [Rank 9] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank9]:[E817 17:02:26.006830762 ProcessGroupNCCL.cpp:695] [Rank 9] To avoid data inconsistency, we are taking the entire process down. +[rank9]:[E817 17:02:26.008165286 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 9] Process group watchdog thread terminated with exception: [Rank 9] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600094 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f685d5df1b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f685e928c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f685e92a7d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f685e92b6ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f68a79a25c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f68b7dd7dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f68b73f7ead in /lib64/libc.so.6) + +[rank8]:[E817 17:02:26.023364446 ProcessGroupNCCL.cpp:681] [Rank 8] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank8]:[E817 17:02:26.023378393 ProcessGroupNCCL.cpp:695] [Rank 8] To avoid data inconsistency, we are taking the entire process down. +[rank8]:[E817 17:02:26.024672839 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 8] Process group watchdog thread terminated with exception: [Rank 8] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600099 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f71bd0d51b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f71be41ec74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f71be4207d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f71be4216ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f72074985c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f72178cddd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f7216eedead in /lib64/libc.so.6) + +[rank10]:[E817 17:02:26.025513607 ProcessGroupNCCL.cpp:681] [Rank 10] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank10]:[E817 17:02:26.025526925 ProcessGroupNCCL.cpp:695] [Rank 10] To avoid data inconsistency, we are taking the entire process down. +[rank10]:[E817 17:02:26.026834247 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 10] Process group watchdog thread terminated with exception: [Rank 10] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600006 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f30890ff1b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f308a448c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f308a44a7d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f308a44b6ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f30d34c25c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f30e38f7dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f30e2f17ead in /lib64/libc.so.6) + +[rank12]:[E817 17:02:26.029057817 ProcessGroupNCCL.cpp:681] [Rank 12] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank12]:[E817 17:02:26.029074177 ProcessGroupNCCL.cpp:695] [Rank 12] To avoid data inconsistency, we are taking the entire process down. +[rank14]:[E817 17:02:26.030039747 ProcessGroupNCCL.cpp:681] [Rank 14] Some NCCL operations have failed or timed out. Due to the asynchronous nature of CUDA kernels, subsequent GPU operations might run on corrupted/incomplete data. +[rank14]:[E817 17:02:26.030051380 ProcessGroupNCCL.cpp:695] [Rank 14] To avoid data inconsistency, we are taking the entire process down. +[rank12]:[E817 17:02:26.030372521 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 12] Process group watchdog thread terminated with exception: [Rank 12] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600097 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7ff0b69011b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7ff0b7c4ac74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7ff0b7c4c7d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7ff0b7c4d6ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7ff100cc45c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7ff1110f9dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7ff110719ead in /lib64/libc.so.6) + +[rank14]:[E817 17:02:26.031341556 ProcessGroupNCCL.cpp:1895] [PG ID 1 PG GUID 1 Rank 14] Process group watchdog thread terminated with exception: [Rank 14] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=5386964, OpType=_REDUCE_SCATTER_BASE, NumelIn=22544384, NumelOut=352256, Timeout(ms)=600000) ran for 600073 milliseconds before timing out. +Exception raised from checkTimeout at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:632 (most recent call first): +frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7f19754ee1b6 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libc10.so) +frame #1: c10d::ProcessGroupNCCL::WorkNCCL::checkTimeout(std::optional > >) + 0x2b4 (0x7f1976837c74 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #2: c10d::ProcessGroupNCCL::watchdogHandler() + 0x890 (0x7f19768397d0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x14d (0x7f197683a6ed in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch_cuda.so) +frame #4: + 0x145c0 (0x7f19bf8b15c0 in /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/lib/libtorch.so) +frame #5: + 0x7dd5 (0x7f19cfce6dd5 in /lib64/libpthread.so.0) +frame #6: clone + 0x6d (0x7f19cf306ead in /lib64/libc.so.6) + +W0817 17:02:27.034000 5963 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 6645 closing signal SIGTERM +W0817 17:02:27.050000 5963 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 6646 closing signal SIGTERM +W0817 17:02:27.051000 5963 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 6647 closing signal SIGTERM +W0817 17:02:27.051000 5963 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 6649 closing signal SIGTERM +W0817 17:02:27.051000 5963 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 6651 closing signal SIGTERM +W0817 17:02:27.052000 5963 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 6652 closing signal SIGTERM +W0817 17:02:27.613000 23464 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 24124 closing signal SIGTERM +W0817 17:02:27.627000 23464 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 24125 closing signal SIGTERM +W0817 17:02:27.628000 23464 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 24126 closing signal SIGTERM +W0817 17:02:27.628000 23464 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 24127 closing signal SIGTERM +W0817 17:02:27.628000 23464 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 24128 closing signal SIGTERM +W0817 17:02:27.628000 23464 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 24130 closing signal SIGTERM +W0817 17:02:27.629000 23464 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 24131 closing signal SIGTERM +E0817 17:02:29.883000 5963 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:869] failed (exitcode: -6) local_rank: 3 (pid: 6648) of binary: /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/bin/python +Traceback (most recent call last): + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/bin/torchrun", line 8, in + sys.exit(main()) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper + return f(*args, **kwargs) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py", line 918, in main + run(args) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py", line 909, in run + elastic_launch( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 138, in __call__ + return launch_agent(self._config, self._entrypoint, list(args)) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 269, in launch_agent + raise ChildFailedError( +torch.distributed.elastic.multiprocessing.errors.ChildFailedError: +===================================================== +qwenvl/train/train_qwen.py FAILED +----------------------------------------------------- +Failures: +[1]: + time : 2025-08-17_17:02:27 + host : HOST-10-140-66-29 + rank : 29 (local_rank: 5) + exitcode : -6 (pid: 6650) + error_file: + traceback : Signal 6 (SIGABRT) received by PID 6650 +----------------------------------------------------- +Root Cause (first observed failure): +[0]: + time : 2025-08-17_17:02:27 + host : HOST-10-140-66-29 + rank : 27 (local_rank: 3) + exitcode : -6 (pid: 6648) + error_file: + traceback : Signal 6 (SIGABRT) received by PID 6648 +===================================================== +E0817 17:02:30.787000 106705 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:869] failed (exitcode: -6) local_rank: 5 (pid: 107476) of binary: /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/bin/python +Traceback (most recent call last): + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/bin/torchrun", line 8, in + sys.exit(main()) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper + return f(*args, **kwargs) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py", line 918, in main + run(args) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py", line 909, in run + elastic_launch( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 138, in __call__ + return launch_agent(self._config, self._entrypoint, list(args)) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 269, in launch_agent + raise ChildFailedError( +torch.distributed.elastic.multiprocessing.errors.ChildFailedError: +======================================================= +qwenvl/train/train_qwen.py FAILED +------------------------------------------------------- +Failures: + +------------------------------------------------------- +Root Cause (first observed failure): +[0]: + time : 2025-08-17_17:02:26 + host : HOST-10-140-66-131 + rank : 37 (local_rank: 5) + exitcode : -6 (pid: 107476) + error_file: + traceback : Signal 6 (SIGABRT) received by PID 107476 +======================================================= +E0817 17:02:32.781000 23464 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:869] failed (exitcode: -6) local_rank: 5 (pid: 24129) of binary: /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/bin/python +Traceback (most recent call last): + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/bin/torchrun", line 8, in + sys.exit(main()) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper + return f(*args, **kwargs) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py", line 918, in main + run(args) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py", line 909, in run + elastic_launch( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 138, in __call__ + return launch_agent(self._config, self._entrypoint, list(args)) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 269, in launch_agent + raise ChildFailedError( +torch.distributed.elastic.multiprocessing.errors.ChildFailedError: +====================================================== +qwenvl/train/train_qwen.py FAILED +------------------------------------------------------ +Failures: + +------------------------------------------------------ +Root Cause (first observed failure): +[0]: + time : 2025-08-17_17:02:27 + host : HOST-10-140-60-116 + rank : 13 (local_rank: 5) + exitcode : -6 (pid: 24129) + error_file: + traceback : Signal 6 (SIGABRT) received by PID 24129 +====================================================== +E0817 17:02:34.579000 69987 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:869] failed (exitcode: -6) local_rank: 5 (pid: 70667) of binary: /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/bin/python +Traceback (most recent call last): + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/bin/torchrun", line 8, in + sys.exit(main()) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper + return f(*args, **kwargs) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py", line 918, in main + run(args) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py", line 909, in run + elastic_launch( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 138, in __call__ + return launch_agent(self._config, self._entrypoint, list(args)) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 269, in launch_agent + raise ChildFailedError( +torch.distributed.elastic.multiprocessing.errors.ChildFailedError: +====================================================== +qwenvl/train/train_qwen.py FAILED +------------------------------------------------------ +Failures: + +------------------------------------------------------ +Root Cause (first observed failure): +[0]: + time : 2025-08-17_17:02:25 + host : HOST-10-140-66-162 + rank : 53 (local_rank: 5) + exitcode : -6 (pid: 70667) + error_file: + traceback : Signal 6 (SIGABRT) received by PID 70667 +====================================================== +E0817 17:02:35.048000 50715 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:869] failed (exitcode: -6) local_rank: 5 (pid: 51372) of binary: /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/bin/python +Traceback (most recent call last): + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/bin/torchrun", line 8, in + sys.exit(main()) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper + return f(*args, **kwargs) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py", line 918, in main + run(args) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py", line 909, in run + elastic_launch( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 138, in __call__ + return launch_agent(self._config, self._entrypoint, list(args)) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 269, in launch_agent + raise ChildFailedError( +torch.distributed.elastic.multiprocessing.errors.ChildFailedError: +====================================================== +qwenvl/train/train_qwen.py FAILED +------------------------------------------------------ +Failures: + +------------------------------------------------------ +Root Cause (first observed failure): +[0]: + time : 2025-08-17_17:02:25 + host : HOST-10-140-66-22 + rank : 21 (local_rank: 5) + exitcode : -6 (pid: 51372) + error_file: + traceback : Signal 6 (SIGABRT) received by PID 51372 +====================================================== +E0817 17:02:36.544000 100739 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py:869] failed (exitcode: -6) local_rank: 5 (pid: 101504) of binary: /mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/bin/python +Traceback (most recent call last): + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/bin/torchrun", line 8, in + sys.exit(main()) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper + return f(*args, **kwargs) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py", line 918, in main + run(args) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py", line 909, in run + elastic_launch( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 138, in __call__ + return launch_agent(self._config, self._entrypoint, list(args)) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 269, in launch_agent + raise ChildFailedError( +torch.distributed.elastic.multiprocessing.errors.ChildFailedError: +======================================================= +qwenvl/train/train_qwen.py FAILED +------------------------------------------------------- +Failures: + +------------------------------------------------------- +Root Cause (first observed failure): +[0]: + time : 2025-08-17_17:02:24 + host : HOST-10-140-66-135 + rank : 45 (local_rank: 5) + exitcode : -6 (pid: 101504) + error_file: + traceback : Signal 6 (SIGABRT) received by PID 101504 +======================================================= +W0817 17:04:33.645000 20387 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:04:33.645000 20387 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:04:33.645000 20387 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:04:33.645000 20387 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +*** +W0817 17:04:38.466000 101834 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:04:38.466000 101834 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:04:38.466000 101834 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:04:38.466000 101834 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:04:39.959000 46686 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:04:39.959000 46686 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:04:39.959000 46686 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:04:39.959000 46686 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:04:41.719000 112895 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:04:41.719000 112895 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:04:41.719000 112895 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:04:41.719000 112895 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-17 17:05:16,877] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,877] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,877] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,878] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,878] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,878] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,879] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,879] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,908] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,898] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,898] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,922] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,923] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,923] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,923] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,923] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,923] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,923] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,910] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,910] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,910] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,910] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,911] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,911] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,912] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,912] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,944] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,945] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,934] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,934] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,935] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,961] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,941] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,941] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,941] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,941] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,965] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,965] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:16,965] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:20,109] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:20,119] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:20,134] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:20,135] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:20,135] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:20,135] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:20,137] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:20,137] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:22,993] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:23,083] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:23,083] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:23,084] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:23,085] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:23,085] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:23,085] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:23,086] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:05:26,154] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:26,154] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:26,133] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:26,133] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:26,133] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:26,133] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:26,133] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:26,133] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:26,133] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:26,154] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:26,154] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:26,154] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:26,154] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:26,155] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:26,155] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:26,141] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-17 17:05:26,141] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:26,596] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,601] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,640] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:26,642] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,646] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,703] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:26,704] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,709] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,715] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:26,717] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:26,717] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:26,719] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,717] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:26,718] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:26,718] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:26,721] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,723] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,725] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:26,725] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,727] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,771] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:26,771] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,779] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,785] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:26,785] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:26,785] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:26,786] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,789] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:26,790] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,774] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,789] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,777] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,779] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:26,786] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:30,046] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:30,046] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:30,047] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:30,047] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:30,048] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:30,048] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:30,048] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:30,049] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:30,507] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:30,622] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:30,622] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:30,624] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:30,626] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:30,626] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:30,627] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:30,628] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:33,288] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:33,288] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:33,289] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:33,289] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:33,289] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:33,289] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:33,290] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:33,290] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:05:33,943] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:34,056] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:34,122] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:34,129] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:34,138] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:34,142] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:05:34,149] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:05:34,152] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 621928 samples from VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl +Rank 0: Loading altas_windows +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 537672 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl +Rank 0: Loading altas_linux +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 21578 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl +Rank 0: Loading atlas_macos +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 3680 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl +Rank 0: Loading android_action_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 5621 samples from VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 11980 samples from VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl +Rank 0: Loading web_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9459 samples from VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 328 samples from VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 54 samples from VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 480 samples from VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 240 samples from VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 944 samples from VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 472 samples from VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading mac_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 1578 samples from VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20394 samples from VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl +Rank 0: Loading web_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 14285 samples from VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl +Rank 0: Loading android_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 3590 samples from VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 21422 samples from VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 100 samples from VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_aug_cropping_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 7675 samples from VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20116 samples from VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl +Rank 0: Loading iphone_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2520 samples from VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl +Rank 0: Loading mac_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7814 samples from VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl +Rank 0: Loading android_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 17838 samples from VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading android_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9008 samples from VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_canvas_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 624 samples from VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 100652 samples from VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl with all sampling strategy +Rank 0: Loaded 28346 samples from VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl +Rank 0: Loading android_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 4907 samples from VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2635 samples from VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5234 samples from VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading ubuntu_action_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl with all sampling strategy +Rank 0: Loaded 3404 samples from VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_1 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2855 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_2 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 655 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_3 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_4 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2643 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_1 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_2 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_3 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_4 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_5 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_6 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_7 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_8 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_crop_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 358 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7946 samples from VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3973 samples from VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 993 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 630 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 249 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2538 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1269 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 172 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl +Rank 0: Loading android_ocr_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl with all sampling strategy +Rank 0: Loaded 29878 samples from VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl +Rank 0: Loading mac_orc_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl with all sampling strategy +Rank 0: Loaded 4393 samples from VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 254 samples from VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_click_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl with random:80% sampling strategy +Rank 0: Loaded 1802 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 53 samples from VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 6578 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 3287 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 542 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_crop_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 270 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 10908 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 5453 samples from VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading android_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13950 samples from VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl +Rank 0: Loading windows_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 12390 samples from VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl +Rank 0: Loading web_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13402 samples from VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl +Rank 0: Loading icon_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl with all sampling strategy +Rank 0: Loaded 81303 samples from VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl +Rank 0: Loading mac_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 1251 samples from VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl +Rank 0: Loading iphone_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 27849 samples from VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl +Rank 0: Loading web_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl with random:80% sampling strategy +Rank 0: Loaded 51607 samples from VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl +Rank 0: Loading android_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl with random:80% sampling strategy +Rank 0: Loaded 6281 samples from VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl +Rank 0: Loading windows_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 25961 samples from VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl +Rank 0: Loading windows_cropping_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl with random:40% sampling strategy +Rank 0: Loaded 9763 samples from VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl +Rank 0: Loading iphone_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl with all sampling strategy +Rank 0: Loaded 16896 samples from VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl +Rank 0: Loading iphone_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl with all sampling strategy +Rank 0: Loaded 927 samples from VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl +Rank 0: Loading mac_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl with all sampling strategy +Rank 0: Loaded 3051 samples from VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl +Rank 0: Loading android_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 25217 samples from VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading android_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 12677 samples from VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading web_canvas_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl with random:40% sampling strategy +Rank 0: Loaded 1566 samples from VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl +Rank 0: Loading web_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 174170 samples from VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 24862 samples from VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl +Rank 0: Loading android_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl with random:80% sampling strategy +Rank 0: Loaded 9142 samples from VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl +Rank 0: Loading windows_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 4229 samples from VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 4200 samples from VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl with random:80% sampling strategy +Rank 0: Loaded 2868 samples from VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_crop_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1372 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 590 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl with all sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1692 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1077 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1070 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 431 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 292 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1509 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 1207 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading uibert_train_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 4646 samples from VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl +Rank 0: Loading openapp_taperception_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 2500 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_widget_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 14878 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_mug_grounding_d240812 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl with all sampling strategy +Rank 0: Loaded 26090 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl +Rank 0: Loading private_ui_phone_2403_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 24798 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ground_d240521_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl with all sampling strategy +Rank 0: Loaded 5008 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl +Rank 0: Loading private_ui_aig_share_2406_ground_d240612_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl with all sampling strategy +Rank 0: Loaded 7903 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl +Rank 0: Loading windows_pc_agent_e_planning_cot +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 55564 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl +Rank 0: Loading windows_pc_agent_e_navigation +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl with all sampling strategy +Rank 0: Loaded 27782 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl +Rank 0: Loading os_genesis_ac_training_data +Rank 0: Skipping os_genesis_ac_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_aw_training_data +Rank 0: Skipping os_genesis_aw_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_web_training +Rank 0: Skipping os_genesis_web_training due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_1 +Rank 0: Skipping gui_odyssey_plus_1 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_2 +Rank 0: Skipping gui_odyssey_plus_2 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_custom_3 +Rank 0: Skipping gui_odyssey_plus_custom_3 due to repeat_time=0 +Rank 0: Loading mm_gui_mid +Rank 0: Skipping mm_gui_mid due to repeat_time=0 +Rank 0: Loading text_gui_mid +Rank 0: Skipping text_gui_mid due to repeat_time=0 +Rank 0: Loading gui_mid_trajectory +Rank 0: Skipping gui_mid_trajectory due to repeat_time=0 +Rank 0: Loading ubuntu_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7024 samples from VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl +Rank 0: Loading windows_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3144 samples from VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl +Rank 0: Total training samples: 6240940 +Rank 0: Formatting inputs...Skip in lazy mode +Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. +Rank 0: Length of multimodal samples: 6230528, pure textual samples: 9984 +Parameter Offload: Total persistent parameters: 755712 in 408 params +W0817 17:11:49.987000 12641 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:11:49.987000 12641 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:11:49.987000 12641 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:11:49.987000 12641 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:11:50.636000 60922 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:11:50.636000 60922 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:11:50.636000 60922 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:11:50.636000 60922 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:11:50.730000 127312 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:11:50.730000 127312 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:11:50.730000 127312 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:11:50.730000 127312 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:11:50.817000 115114 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:11:50.817000 115114 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:11:50.817000 115114 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:11:50.817000 115114 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:11:50.921000 5386 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:11:50.921000 5386 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:11:50.921000 5386 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:11:50.921000 5386 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:11:51.114000 39263 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:11:51.114000 39263 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:11:51.114000 39263 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:11:51.114000 39263 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:11:51.194000 119265 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:11:51.194000 119265 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:11:51.194000 119265 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:11:51.194000 119265 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:14:26.592000 98653 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:14:26.592000 98653 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:14:26.592000 98653 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:14:26.592000 98653 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:14:26.858000 20137 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:14:26.858000 20137 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:14:26.858000 20137 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:14:26.858000 20137 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:14:27.053000 68345 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:14:27.053000 68345 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:14:27.053000 68345 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:14:27.053000 68345 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:14:27.228000 118992 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:14:27.228000 118992 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:14:27.228000 118992 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:14:27.228000 118992 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:14:27.645000 43041 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:14:27.645000 43041 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:14:27.645000 43041 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:14:27.645000 43041 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:14:28.213000 9483 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:14:28.213000 9483 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:14:28.213000 9483 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:14:28.213000 9483 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:14:29.054000 129497 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:14:29.054000 129497 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:14:29.054000 129497 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:14:29.054000 129497 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-17 17:14:40,658] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,688] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,697] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,697] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,704] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,704] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,709] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,710] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,720] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,714] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,714] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,714] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,730] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,721] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,721] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,721] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,721] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,721] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,715] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,716] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,716] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,720] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,731] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,731] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,720] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,722] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,722] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,724] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,724] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,746] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,742] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,743] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,743] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,760] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,760] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,765] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,765] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,765] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:40,766] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:43,976] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,976] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,976] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,977] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,978] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,978] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,978] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,993] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,976] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,977] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,977] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,978] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,978] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,978] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,979] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,986] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,986] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,986] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,986] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,986] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,986] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,993] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,994] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,994] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,994] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,994] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,994] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:43,994] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:44,401] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:44,416] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:44,470] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:44,528] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:44,536] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:44,542] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:44,538] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:44,538] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:44,538] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:44,540] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:44,540] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:44,541] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:44,547] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:44,547] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:44,549] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +[2025-08-17 17:14:44,549] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:44,549] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:44,583] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:44,609] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:44,619] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:44,621] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:44,607] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +22] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:44,622] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:44,622] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:44,635] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:44,617] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +39] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:44,639] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:44,639] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:44,640] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU af[2025-08-17 17:14:44,623] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +alized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:44,623] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:44,623] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:44,624] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:44,870] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:44,873] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:44,885] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:44,890] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:44,890] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:44,909] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:44,909] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:44,912] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:14:47,993] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:47,993] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:47,993] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:47,993] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:47,993] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:47,993] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:47,997] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:47,997] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:47,997] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-17 17:14:48,462] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:48,596] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:48,598] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:48,598] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:48,599] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:48,600] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:48,603] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:48,608] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:50,035] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:50,035] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:50,037] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:50,038] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:50,039] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:50,042] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:50,043] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:50,048] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:14:50,594] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:50,804] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:50,808] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:50,811] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:50,814] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:14:50,816] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:50,820] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:14:50,823] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +W0817 17:16:50.613000 8651 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:16:50.613000 8651 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:16:50.613000 8651 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:16:50.613000 8651 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +*** +W0817 17:16:50.682000 51430 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:16:50.682000 51430 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:16:50.682000 51430 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:16:50.682000 51430 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:16:50.806000 127237 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:16:50.806000 127237 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:16:50.806000 127237 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:16:50.806000 127237 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:16:51.586000 19534 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:16:51.586000 19534 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:16:51.586000 19534 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:16:51.586000 19534 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:16:51.600000 103392 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:16:51.600000 103392 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:16:51.600000 103392 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:16:51.600000 103392 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:16:52.386000 3488 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:16:52.386000 3488 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:16:52.386000 3488 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:16:52.386000 3488 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-17 17:17:03,993] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,000] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,041] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,039] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,037] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,038] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,039] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,040] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,040] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,045] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,068] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,060] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,061] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,061] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,063] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,063] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,064] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,064] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,075] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,066] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,075] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,069] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,083] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,070] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,070] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,070] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,092] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,070] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,070] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,070] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,073] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,073] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,092] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,102] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,093] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,102] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,102] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:04,102] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:07,095] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,106] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,098] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,106] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,106] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,102] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,102] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,104] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,104] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,104] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,101] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,101] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,103] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,104] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,110] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,115] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,115] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,115] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,117] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,122] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,122] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,131] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,131] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,134] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,134] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,134] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,134] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,135] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,135] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:07,491] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:07,491] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:07,493] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:07,496] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:07,528] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:07,537] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +ter initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:07,537] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:07,538] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:07,538] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:07,552] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:07,599] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,612] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:07,651] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,662] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:07,662] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:07,668] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:07,671] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,672] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,673] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:07,666] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,670] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,670] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,670] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,670] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,674] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:07,691] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,692] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,692] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,692] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,695] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:07,748] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,748] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,750] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,750] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,760] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:07,754] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,754] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:07,766] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:07,767] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:08,656] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:08,658] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:08,670] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:08,671] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:08,672] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:08,672] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:08,672] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:08,672] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:17:12,214] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:12,214] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:12,214] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:12,215] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:12,215] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:12,215] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:12,215] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:12,216] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:12,216] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-17 17:17:12,565] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:12,567] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:12,568] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:12,572] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:12,576] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:12,580] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:12,584] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:12,585] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:17:12,687] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:12,833] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:12,833] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:12,835] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:12,836] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:12,837] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:12,837] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:12,837] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:13,130] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:13,346] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:13,366] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:13,369] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:13,374] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:13,377] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:17:13,378] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:17:13,379] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +W0817 17:20:04.656000 99944 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:20:04.656000 99944 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:20:04.656000 99944 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:20:04.656000 99944 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +*** +W0817 17:20:21.043000 21769 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:20:21.043000 21769 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:20:21.043000 21769 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:20:21.043000 21769 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:20:37.352000 116958 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 17:20:37.352000 116958 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 17:20:37.352000 116958 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 17:20:37.352000 116958 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-17 17:21:13,552] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,564] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,564] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,566] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,553] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,553] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,553] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,553] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,577] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,568] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,579] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,568] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,562] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,563] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,563] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,566] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,566] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,566] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,566] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,571] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,583] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,572] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,572] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,574] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,574] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,574] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,574] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,583] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,585] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,586] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,585] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,585] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,601] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,601] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,602] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,602] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,602] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,602] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,603] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:13,603] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:16,098] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:16,098] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:16,100] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:16,103] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:16,103] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:16,103] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:16,103] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:16,104] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:22,902] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:22,883] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:22,883] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:22,883] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:22,883] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:22,883] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:22,884] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:22,884] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:22,886] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:22,887] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:22,887] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:22,887] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-17 17:21:22,887] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:22,887] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:22,887] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:22,887] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:23,396] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:23,398] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:23,443] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:23,534] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:23,536] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:23,537] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:23,540] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:23,545] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:23,530] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:23,531] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:23,529] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:23,548] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:23,536] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:23,537] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:23,536] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:23,536] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +e Flash Attention 2.0 with a model not initialized on GPU. Make sure to moYou are attempting to use Flash Attention 2.0 with a model not initialized[2025-08-17 17:21:23,535] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:23,548] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +[2025-08-17 17:21:23,537] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:23,554] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:23,544] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:23,546] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:23,556] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:23,557] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:23,557] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:23,572] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:23,579] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:23,579] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:23,581] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:23,581] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:23,581] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:26,410] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:26,410] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:26,410] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:26,410] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:26,411] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:26,411] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:26,411] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:26,411] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:26,919] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:27,048] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:27,049] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:27,049] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:27,049] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:27,049] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:27,049] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:27,053] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:32,056] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:32,067] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:32,101] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:32,117] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:32,121] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:32,122] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:32,122] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:32,122] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 17:21:44,317] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:44,318] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:44,319] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:44,320] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:44,320] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:44,320] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:44,321] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:44,321] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 17:21:45,122] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:45,266] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:45,267] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:45,269] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:45,272] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:45,304] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:45,318] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 17:21:45,321] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 17:21:48,139] [INFO] [partition_parameters.py:348:__exit__] finished initializing model - num_params = 825, num_elems = 4.07B + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 621928 samples from VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl +Rank 0: Loading altas_windows +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 537672 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl +Rank 0: Loading altas_linux +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 21578 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl +Rank 0: Loading atlas_macos +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 3680 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl +Rank 0: Loading android_action_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 5621 samples from VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 11980 samples from VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl +Rank 0: Loading web_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9459 samples from VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 328 samples from VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 54 samples from VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 480 samples from VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 240 samples from VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 944 samples from VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 472 samples from VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading mac_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 1578 samples from VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20394 samples from VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl +Rank 0: Loading web_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 14285 samples from VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl +Rank 0: Loading android_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 3590 samples from VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 21422 samples from VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 100 samples from VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_aug_cropping_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 7675 samples from VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20116 samples from VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl +Rank 0: Loading iphone_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2520 samples from VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl +Rank 0: Loading mac_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7814 samples from VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl +Rank 0: Loading android_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 17838 samples from VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading android_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9008 samples from VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_canvas_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 624 samples from VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 100652 samples from VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl with all sampling strategy +Rank 0: Loaded 28346 samples from VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl +Rank 0: Loading android_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 4907 samples from VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2635 samples from VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5234 samples from VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading ubuntu_action_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl with all sampling strategy +Rank 0: Loaded 3404 samples from VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_1 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2855 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_2 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 655 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_3 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_4 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2643 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_1 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_2 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_3 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_4 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_5 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_6 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_7 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_8 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_crop_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 358 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7946 samples from VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3973 samples from VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 993 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 630 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 249 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2538 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1269 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 172 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl +Rank 0: Loading android_ocr_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl with all sampling strategy +Rank 0: Loaded 29878 samples from VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl +Rank 0: Loading mac_orc_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl with all sampling strategy +Rank 0: Loaded 4393 samples from VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 254 samples from VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_click_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl with random:80% sampling strategy +Rank 0: Loaded 1802 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 53 samples from VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 6578 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 3287 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 542 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_crop_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 270 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 10908 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 5453 samples from VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading android_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13950 samples from VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl +Rank 0: Loading windows_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 12390 samples from VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl +Rank 0: Loading web_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13402 samples from VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl +Rank 0: Loading icon_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl with all sampling strategy +Rank 0: Loaded 81303 samples from VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl +Rank 0: Loading mac_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 1251 samples from VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl +Rank 0: Loading iphone_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 27849 samples from VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl +Rank 0: Loading web_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl with random:80% sampling strategy +Rank 0: Loaded 51607 samples from VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl +Rank 0: Loading android_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl with random:80% sampling strategy +Rank 0: Loaded 6281 samples from VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl +Rank 0: Loading windows_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 25961 samples from VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl +Rank 0: Loading windows_cropping_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl with random:40% sampling strategy +Rank 0: Loaded 9763 samples from VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl +Rank 0: Loading iphone_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl with all sampling strategy +Rank 0: Loaded 16896 samples from VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl +Rank 0: Loading iphone_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl with all sampling strategy +Rank 0: Loaded 927 samples from VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl +Rank 0: Loading mac_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl with all sampling strategy +Rank 0: Loaded 3051 samples from VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl +Rank 0: Loading android_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 25217 samples from VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading android_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 12677 samples from VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading web_canvas_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl with random:40% sampling strategy +Rank 0: Loaded 1566 samples from VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl +Rank 0: Loading web_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 174170 samples from VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 24862 samples from VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl +Rank 0: Loading android_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl with random:80% sampling strategy +Rank 0: Loaded 9142 samples from VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl +Rank 0: Loading windows_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 4229 samples from VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 4200 samples from VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl with random:80% sampling strategy +Rank 0: Loaded 2868 samples from VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_crop_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1372 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 590 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl with all sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1692 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1077 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1070 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 431 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 292 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1509 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 1207 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading uibert_train_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 4646 samples from VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl +Rank 0: Loading openapp_taperception_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 2500 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_widget_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 14878 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_mug_grounding_d240812 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl with all sampling strategy +Rank 0: Loaded 26090 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl +Rank 0: Loading private_ui_phone_2403_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 24798 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ground_d240521_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl with all sampling strategy +Rank 0: Loaded 5008 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl +Rank 0: Loading private_ui_aig_share_2406_ground_d240612_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl with all sampling strategy +Rank 0: Loaded 7903 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl +Rank 0: Loading windows_pc_agent_e_planning_cot +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 55564 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl +Rank 0: Loading windows_pc_agent_e_navigation +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl with all sampling strategy +Rank 0: Loaded 27782 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl +Rank 0: Loading os_genesis_ac_training_data +Rank 0: Skipping os_genesis_ac_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_aw_training_data +Rank 0: Skipping os_genesis_aw_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_web_training +Rank 0: Skipping os_genesis_web_training due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_1 +Rank 0: Skipping gui_odyssey_plus_1 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_2 +Rank 0: Skipping gui_odyssey_plus_2 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_custom_3 +Rank 0: Skipping gui_odyssey_plus_custom_3 due to repeat_time=0 +Rank 0: Loading mm_gui_mid +Rank 0: Skipping mm_gui_mid due to repeat_time=0 +Rank 0: Loading text_gui_mid +Rank 0: Skipping text_gui_mid due to repeat_time=0 +Rank 0: Loading gui_mid_trajectory +Rank 0: Skipping gui_mid_trajectory due to repeat_time=0 +Rank 0: Loading ubuntu_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7024 samples from VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl +Rank 0: Loading windows_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3144 samples from VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl +Rank 0: Total training samples: 6240940 +Rank 0: Formatting inputs...Skip in lazy mode +Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. +Rank 0: Length of multimodal samples: 6230528, pure textual samples: 9984 +Parameter Offload: Total persistent parameters: 755712 in 408 params + 0%| | 0/12188 [00:00 + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f6896ec1080> +[Try #0] Failed to fetch sample 4744415 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f6896ec1080> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Create account'"}, {'from': 'gpt', 'value': '\nclick(x=0.9395, y=0.1175)\n'}]} + 75%|███████▍ | 9096/12188 [14:24<6:56:37, 8.08s/it] {'loss': 0.2943, 'grad_norm': 0.7050684527006341, 'learning_rate': 1.5959836319310612e-06, 'epoch': 0.75} + 75%|███████▍ | 9096/12188 [14:24<6:56:37, 8.08s/it] 75%|███████▍ | 9097/12188 [14:32<6:41:51, 7.80s/it] {'loss': 0.2795, 'grad_norm': 0.8172788034452727, 'learning_rate': 1.595010519690046e-06, 'epoch': 0.75} + 75%|███████▍ | 9097/12188 [14:32<6:41:51, 7.80s/it] 75%|███████▍ | 9098/12188 [14:39<6:30:22, 7.58s/it] {'loss': 0.3427, 'grad_norm': 0.6636126186161512, 'learning_rate': 1.5940376479039195e-06, 'epoch': 0.75} + 75%|███████▍ | 9098/12188 [14:39<6:30:22, 7.58s/it] 75%|███████▍ | 9099/12188 [14:47<6:49:51, 7.96s/it] {'loss': 0.2735, 'grad_norm': 0.6788968251317297, 'learning_rate': 1.5930650166413803e-06, 'epoch': 0.75} + 75%|███████▍ | 9099/12188 [14:47<6:49:51, 7.96s/it] 75%|███████▍ | 9100/12188 [14:55<6:44:35, 7.86s/it] {'loss': 0.3065, 'grad_norm': 0.6733406646098854, 'learning_rate': 1.5920926259711173e-06, 'epoch': 0.75} + 75%|███████▍ | 9100/12188 [14:55<6:44:35, 7.86s/it] 75%|███████▍ | 9101/12188 [15:06<7:36:07, 8.87s/it] {'loss': 0.2967, 'grad_norm': 0.7055709475257307, 'learning_rate': 1.5911204759617965e-06, 'epoch': 0.75} + 75%|███████▍ | 9101/12188 [15:06<7:36:07, 8.87s/it] 75%|███████▍ | 9102/12188 [15:17<7:58:42, 9.31s/it] {'loss': 0.2851, 'grad_norm': 0.6426294599051983, 'learning_rate': 1.590148566682071e-06, 'epoch': 0.75} + 75%|███████▍ | 9102/12188 [15:17<7:58:42, 9.31s/it] 75%|███████▍ | 9103/12188 [15:24<7:24:09, 8.64s/it] {'loss': 0.2807, 'grad_norm': 0.7890754221302223, 'learning_rate': 1.5891768982005768e-06, 'epoch': 0.75} + 75%|███████▍ | 9103/12188 [15:24<7:24:09, 8.64s/it] 75%|███████▍ | 9104/12188 [15:32<7:16:40, 8.50s/it] {'loss': 0.3681, 'grad_norm': 0.6381561749967412, 'learning_rate': 1.588205470585929e-06, 'epoch': 0.75} + 75%|███████▍ | 9104/12188 [15:32<7:16:40, 8.50s/it] 75%|███████▍ | 9105/12188 [15:39<6:51:44, 8.01s/it] {'loss': 0.2941, 'grad_norm': 0.8827305939835662, 'learning_rate': 1.5872342839067305e-06, 'epoch': 0.75} + 75%|███████▍ | 9105/12188 [15:39<6:51:44, 8.01s/it] 75%|███████▍ | 9106/12188 [15:46<6:43:13, 7.85s/it] {'loss': 0.3189, 'grad_norm': 0.6961776691810667, 'learning_rate': 1.5862633382315622e-06, 'epoch': 0.75} + 75%|███████▍ | 9106/12188 [15:46<6:43:13, 7.85s/it] 75%|███████▍ | 9107/12188 [15:56<7:09:00, 8.35s/it] {'loss': 0.3217, 'grad_norm': 0.6687360181074119, 'learning_rate': 1.5852926336289926e-06, 'epoch': 0.75} + 75%|███████▍ | 9107/12188 [15:56<7:09:00, 8.35s/it] 75%|███████▍ | 9108/12188 [16:03<6:49:25, 7.98s/it] {'loss': 0.2934, 'grad_norm': 0.6920541067043184, 'learning_rate': 1.5843221701675725e-06, 'epoch': 0.75} + 75%|███████▍ | 9108/12188 [16:03<6:49:25, 7.98s/it] 75%|███████▍ | 9109/12188 [16:10<6:30:20, 7.61s/it] {'loss': 0.2742, 'grad_norm': 0.7193549224101764, 'learning_rate': 1.5833519479158332e-06, 'epoch': 0.75} + 75%|███████▍ | 9109/12188 [16:10<6:30:20, 7.61s/it] 75%|███████▍ | 9110/12188 [16:20<7:10:50, 8.40s/it] {'loss': 0.3075, 'grad_norm': 0.6981266211374404, 'learning_rate': 1.5823819669422885e-06, 'epoch': 0.75} + 75%|███████▍ | 9110/12188 [16:20<7:10:50, 8.40s/it] 75%|███████▍ | 9111/12188 [16:30<7:37:10, 8.91s/it] {'loss': 0.315, 'grad_norm': 0.6577638170988356, 'learning_rate': 1.5814122273154404e-06, 'epoch': 0.75} + 75%|███████▍ | 9111/12188 [16:30<7:37:10, 8.91s/it] 75%|███████▍ | 9112/12188 [16:37<7:09:05, 8.37s/it] {'loss': 0.2608, 'grad_norm': 0.6672922898165287, 'learning_rate': 1.580442729103766e-06, 'epoch': 0.75} + 75%|███████▍ | 9112/12188 [16:37<7:09:05, 8.37s/it] 75%|███████▍ | 9113/12188 [16:46<7:14:30, 8.48s/it] {'loss': 0.2981, 'grad_norm': 0.6890477040278038, 'learning_rate': 1.5794734723757343e-06, 'epoch': 0.75} + 75%|███████▍ | 9113/12188 [16:46<7:14:30, 8.48s/it] 75%|███████▍ | 9114/12188 [16:55<7:25:09, 8.69s/it] {'loss': 0.3159, 'grad_norm': 0.6868272641599467, 'learning_rate': 1.5785044571997888e-06, 'epoch': 0.75} + 75%|███████▍ | 9114/12188 [16:55<7:25:09, 8.69s/it] 75%|███████▍ | 9115/12188 [17:03<7:10:16, 8.40s/it] {'loss': 0.2923, 'grad_norm': 0.6698799138406278, 'learning_rate': 1.5775356836443611e-06, 'epoch': 0.75} + 75%|███████▍ | 9115/12188 [17:03<7:10:16, 8.40s/it] 75%|███████▍ | 9116/12188 [17:11<7:11:14, 8.42s/it] {'loss': 0.3282, 'grad_norm': 0.7404772768838462, 'learning_rate': 1.5765671517778668e-06, 'epoch': 0.75} + 75%|███████▍ | 9116/12188 [17:11<7:11:14, 8.42s/it] 75%|███████▍ | 9117/12188 [17:18<6:43:51, 7.89s/it] {'loss': 0.3242, 'grad_norm': 0.7378404966605135, 'learning_rate': 1.5755988616686984e-06, 'epoch': 0.75} + 75%|███████▍ | 9117/12188 [17:18<6:43:51, 7.89s/it] 75%|███████▍ | 9118/12188 [17:25<6:25:40, 7.54s/it] {'loss': 0.3083, 'grad_norm': 0.6980366553636397, 'learning_rate': 1.5746308133852384e-06, 'epoch': 0.75} + 75%|███████▍ | 9118/12188 [17:25<6:25:40, 7.54s/it] 75%|███████▍ | 9119/12188 [17:31<6:13:55, 7.31s/it] {'loss': 0.303, 'grad_norm': 0.6466009930587999, 'learning_rate': 1.5736630069958453e-06, 'epoch': 0.75} + 75%|███████▍ | 9119/12188 [17:31<6:13:55, 7.31s/it] 75%|███████▍ | 9120/12188 [17:38<6:07:45, 7.19s/it] {'loss': 0.3549, 'grad_norm': 0.6567802313823907, 'learning_rate': 1.5726954425688662e-06, 'epoch': 0.75} + 75%|███████▍ | 9120/12188 [17:38<6:07:45, 7.19s/it] 75%|███████▍ | 9121/12188 [17:47<6:35:47, 7.74s/it] {'loss': 0.2957, 'grad_norm': 0.7170366919959754, 'learning_rate': 1.5717281201726298e-06, 'epoch': 0.75} + 75%|███████▍ | 9121/12188 [17:47<6:35:47, 7.74s/it] 75%|███████▍ | 9122/12188 [17:54<6:20:55, 7.45s/it] {'loss': 0.299, 'grad_norm': 0.7037708334887701, 'learning_rate': 1.5707610398754442e-06, 'epoch': 0.75} + 75%|███████▍ | 9122/12188 [17:54<6:20:55, 7.45s/it] 75%|███████▍ | 9123/12188 [18:01<6:15:59, 7.36s/it] {'loss': 0.3175, 'grad_norm': 0.6572472390398075, 'learning_rate': 1.5697942017456059e-06, 'epoch': 0.75} + 75%|███████▍ | 9123/12188 [18:01<6:15:59, 7.36s/it] 75%|███████▍ | 9124/12188 [18:08<6:05:52, 7.16s/it] {'loss': 0.296, 'grad_norm': 0.7696857817497345, 'learning_rate': 1.5688276058513897e-06, 'epoch': 0.75} + 75%|███████▍ | 9124/12188 [18:08<6:05:52, 7.16s/it] 75%|███████▍ | 9125/12188 [18:15<6:00:28, 7.06s/it] {'loss': 0.2717, 'grad_norm': 0.657748095710143, 'learning_rate': 1.5678612522610542e-06, 'epoch': 0.75} + 75%|███████▍ | 9125/12188 [18:15<6:00:28, 7.06s/it] 75%|███████▍ | 9126/12188 [18:23<6:17:22, 7.39s/it] {'loss': 0.2988, 'grad_norm': 0.6299949777815126, 'learning_rate': 1.5668951410428447e-06, 'epoch': 0.75} + 75%|███████▍ | 9126/12188 [18:23<6:17:22, 7.39s/it] 75%|███████▍ | 9127/12188 [18:30<6:15:55, 7.37s/it] {'loss': 0.2738, 'grad_norm': 0.6419552673765891, 'learning_rate': 1.5659292722649826e-06, 'epoch': 0.75} + 75%|███████▍ | 9127/12188 [18:30<6:15:55, 7.37s/it] 75%|███████▍ | 9128/12188 [18:37<6:05:11, 7.16s/it] {'loss': 0.3521, 'grad_norm': 0.6809964567822552, 'learning_rate': 1.5649636459956796e-06, 'epoch': 0.75} + 75%|███████▍ | 9128/12188 [18:37<6:05:11, 7.16s/it] 75%|███████▍ | 9129/12188 [18:46<6:27:25, 7.60s/it] {'loss': 0.2921, 'grad_norm': 0.6799680960985561, 'learning_rate': 1.563998262303124e-06, 'epoch': 0.75} + 75%|███████▍ | 9129/12188 [18:46<6:27:25, 7.60s/it] 75%|███████▍ | 9130/12188 [18:54<6:36:19, 7.78s/it] {'loss': 0.3103, 'grad_norm': 0.6376174101864279, 'learning_rate': 1.5630331212554906e-06, 'epoch': 0.75} + 75%|███████▍ | 9130/12188 [18:54<6:36:19, 7.78s/it] 75%|███████▍ | 9131/12188 [19:00<6:19:44, 7.45s/it] {'loss': 0.2923, 'grad_norm': 0.7094791927007649, 'learning_rate': 1.5620682229209378e-06, 'epoch': 0.75} + 75%|███████▍ | 9131/12188 [19:00<6:19:44, 7.45s/it] 75%|███████▍ | 9132/12188 [19:08<6:19:42, 7.46s/it] {'loss': 0.3147, 'grad_norm': 0.6719685136341286, 'learning_rate': 1.5611035673676018e-06, 'epoch': 0.75} + 75%|███████▍ | 9132/12188 [19:08<6:19:42, 7.46s/it] 75%|███████▍ | 9133/12188 [19:15<6:21:01, 7.48s/it] {'loss': 0.3013, 'grad_norm': 0.7136717596468931, 'learning_rate': 1.560139154663607e-06, 'epoch': 0.75} + 75%|███████▍ | 9133/12188 [19:15<6:21:01, 7.48s/it] 75%|███████▍ | 9134/12188 [19:24<6:38:09, 7.82s/it] {'loss': 0.3401, 'grad_norm': 0.8333775271396712, 'learning_rate': 1.5591749848770604e-06, 'epoch': 0.75} + 75%|███████▍ | 9134/12188 [19:24<6:38:09, 7.82s/it] 75%|███████▍ | 9135/12188 [19:31<6:20:15, 7.47s/it] {'loss': 0.2878, 'grad_norm': 0.65425805564909, 'learning_rate': 1.5582110580760462e-06, 'epoch': 0.75} + 75%|███████▍ | 9135/12188 [19:31<6:20:15, 7.47s/it] 75%|███████▍ | 9136/12188 [19:41<6:59:11, 8.24s/it] {'loss': 0.324, 'grad_norm': 0.7290318435649271, 'learning_rate': 1.5572473743286393e-06, 'epoch': 0.75} + 75%|███████▍ | 9136/12188 [19:41<6:59:11, 8.24s/it] 75%|███████▍ | 9137/12188 [19:48<6:40:25, 7.87s/it] {'loss': 0.2935, 'grad_norm': 0.7011110421180631, 'learning_rate': 1.5562839337028917e-06, 'epoch': 0.75} + 75%|███████▍ | 9137/12188 [19:48<6:40:25, 7.87s/it] 75%|███████▍ | 9138/12188 [19:54<6:23:16, 7.54s/it] {'loss': 0.3613, 'grad_norm': 0.6967666027685071, 'learning_rate': 1.5553207362668383e-06, 'epoch': 0.75} + 75%|███████▍ | 9138/12188 [19:55<6:23:16, 7.54s/it] 75%|███████▍ | 9139/12188 [20:01<6:09:24, 7.27s/it] {'loss': 0.298, 'grad_norm': 0.7275714332667291, 'learning_rate': 1.5543577820885014e-06, 'epoch': 0.75} + 75%|███████▍ | 9139/12188 [20:01<6:09:24, 7.27s/it] 75%|███████▍ | 9140/12188 [20:08<6:00:53, 7.10s/it] {'loss': 0.2626, 'grad_norm': 0.6880846212841614, 'learning_rate': 1.553395071235881e-06, 'epoch': 0.75} + 75%|███████▍ | 9140/12188 [20:08<6:00:53, 7.10s/it] 75%|███████▌ | 9141/12188 [20:16<6:18:57, 7.46s/it] {'loss': 0.2985, 'grad_norm': 0.7519216195318449, 'learning_rate': 1.5524326037769649e-06, 'epoch': 0.75} + 75%|███████▌ | 9141/12188 [20:16<6:18:57, 7.46s/it] 75%|███████▌ | 9142/12188 [20:23<6:12:45, 7.34s/it] {'loss': 0.2964, 'grad_norm': 0.6984129768591841, 'learning_rate': 1.5514703797797177e-06, 'epoch': 0.75} + 75%|███████▌ | 9142/12188 [20:23<6:12:45, 7.34s/it] 75%|███████▌ | 9143/12188 [20:32<6:36:05, 7.80s/it] {'loss': 0.3062, 'grad_norm': 0.6465152585095775, 'learning_rate': 1.5505083993120917e-06, 'epoch': 0.75} + 75%|███████▌ | 9143/12188 [20:32<6:36:05, 7.80s/it] 75%|███████▌ | 9144/12188 [20:40<6:31:52, 7.72s/it] {'loss': 0.3032, 'grad_norm': 0.6880619507049293, 'learning_rate': 1.5495466624420218e-06, 'epoch': 0.75} + 75%|███████▌ | 9144/12188 [20:40<6:31:52, 7.72s/it] 75%|███████▌ | 9145/12188 [20:47<6:25:28, 7.60s/it] {'loss': 0.3225, 'grad_norm': 0.67201332833827, 'learning_rate': 1.548585169237422e-06, 'epoch': 0.75} + 75%|███████▌ | 9145/12188 [20:47<6:25:28, 7.60s/it] 75%|███████▌ | 9146/12188 [20:57<7:03:23, 8.35s/it] {'loss': 0.2992, 'grad_norm': 0.6992739144946865, 'learning_rate': 1.5476239197661918e-06, 'epoch': 0.75} + 75%|███████▌ | 9146/12188 [20:57<7:03:23, 8.35s/it] 75%|███████▌ | 9147/12188 [21:04<6:39:13, 7.88s/it] {'loss': 0.315, 'grad_norm': 1.0077875140743224, 'learning_rate': 1.5466629140962158e-06, 'epoch': 0.75} + 75%|███████▌ | 9147/12188 [21:04<6:39:13, 7.88s/it] 75%|███████▌ | 9148/12188 [21:11<6:25:23, 7.61s/it] {'loss': 0.2805, 'grad_norm': 0.5851851201151765, 'learning_rate': 1.5457021522953547e-06, 'epoch': 0.75} + 75%|███████▌ | 9148/12188 [21:11<6:25:23, 7.61s/it] 75%|███████▌ | 9149/12188 [21:20<6:47:44, 8.05s/it] {'loss': 0.3161, 'grad_norm': 0.6428382535898999, 'learning_rate': 1.5447416344314598e-06, 'epoch': 0.75} + 75%|███████▌ | 9149/12188 [21:20<6:47:44, 8.05s/it] 75%|███████▌ | 9150/12188 [21:27<6:27:04, 7.64s/it] {'loss': 0.3383, 'grad_norm': 0.7978582464426802, 'learning_rate': 1.5437813605723578e-06, 'epoch': 0.75} + 75%|███████▌ | 9150/12188 [21:27<6:27:04, 7.64s/it] 75%|███████▌ | 9151/12188 [21:34<6:26:56, 7.64s/it] {'loss': 0.2977, 'grad_norm': 0.672175060471377, 'learning_rate': 1.5428213307858646e-06, 'epoch': 0.75} + 75%|███████▌ | 9151/12188 [21:34<6:26:56, 7.64s/it] 75%|███████▌ | 9152/12188 [21:43<6:49:54, 8.10s/it] {'loss': 0.2896, 'grad_norm': 0.7050464612210299, 'learning_rate': 1.5418615451397746e-06, 'epoch': 0.75} + 75%|███████▌ | 9152/12188 [21:43<6:49:54, 8.10s/it] 75%|███████▌ | 9153/12188 [21:50<6:28:52, 7.69s/it] {'loss': 0.3337, 'grad_norm': 0.6549473822720497, 'learning_rate': 1.5409020037018652e-06, 'epoch': 0.75} + 75%|███████▌ | 9153/12188 [21:50<6:28:52, 7.69s/it] 75%|███████▌ | 9154/12188 [21:57<6:18:49, 7.49s/it] {'loss': 0.3025, 'grad_norm': 0.742869157980379, 'learning_rate': 1.5399427065398998e-06, 'epoch': 0.75} + 75%|███████▌ | 9154/12188 [21:57<6:18:49, 7.49s/it] 75%|███████▌ | 9155/12188 [22:06<6:39:34, 7.90s/it] {'loss': 0.3813, 'grad_norm': 0.6795245045191878, 'learning_rate': 1.53898365372162e-06, 'epoch': 0.75} + 75%|███████▌ | 9155/12188 [22:06<6:39:34, 7.90s/it] 75%|███████▌ | 9156/12188 [22:13<6:25:14, 7.62s/it] {'loss': 0.3106, 'grad_norm': 0.6938287127007983, 'learning_rate': 1.5380248453147544e-06, 'epoch': 0.75} + 75%|███████▌ | 9156/12188 [22:13<6:25:14, 7.62s/it] 75%|███████▌ | 9157/12188 [22:20<6:20:41, 7.54s/it] {'loss': 0.2927, 'grad_norm': 0.7278392247149372, 'learning_rate': 1.5370662813870134e-06, 'epoch': 0.75} + 75%|███████▌ | 9157/12188 [22:20<6:20:41, 7.54s/it] 75%|███████▌ | 9158/12188 [22:27<6:13:04, 7.39s/it] {'loss': 0.2963, 'grad_norm': 0.7879069809375612, 'learning_rate': 1.5361079620060864e-06, 'epoch': 0.75} + 75%|███████▌ | 9158/12188 [22:27<6:13:04, 7.39s/it] 75%|███████▌ | 9159/12188 [22:36<6:38:38, 7.90s/it] {'loss': 0.2692, 'grad_norm': 0.7320851860239854, 'learning_rate': 1.535149887239652e-06, 'epoch': 0.75} + 75%|███████▌ | 9159/12188 [22:36<6:38:38, 7.90s/it] 75%|███████▌ | 9160/12188 [22:44<6:27:52, 7.69s/it] {'loss': 0.2845, 'grad_norm': 0.6476565785774194, 'learning_rate': 1.5341920571553636e-06, 'epoch': 0.75} + 75%|███████▌ | 9160/12188 [22:44<6:27:52, 7.69s/it] 75%|███████▌ | 9161/12188 [22:50<6:14:33, 7.42s/it] {'loss': 0.305, 'grad_norm': 0.7039356798482055, 'learning_rate': 1.533234471820864e-06, 'epoch': 0.75} + 75%|███████▌ | 9161/12188 [22:50<6:14:33, 7.42s/it] 75%|███████▌ | 9162/12188 [22:58<6:10:28, 7.35s/it] {'loss': 0.3031, 'grad_norm': 1.4241033597206696, 'learning_rate': 1.5322771313037783e-06, 'epoch': 0.75} + 75%|███████▌ | 9162/12188 [22:58<6:10:28, 7.35s/it] 75%|███████▌ | 9163/12188 [23:05<6:11:14, 7.36s/it] {'loss': 0.2742, 'grad_norm': 0.7121918807748865, 'learning_rate': 1.5313200356717083e-06, 'epoch': 0.75} + 75%|███████▌ | 9163/12188 [23:05<6:11:14, 7.36s/it] 75%|███████▌ | 9164/12188 [23:14<6:41:11, 7.96s/it] {'loss': 0.3166, 'grad_norm': 0.7679116034375515, 'learning_rate': 1.5303631849922462e-06, 'epoch': 0.75} + 75%|███████▌ | 9164/12188 [23:14<6:41:11, 7.96s/it] 75%|███████▌ | 9165/12188 [23:23<6:47:48, 8.09s/it] {'loss': 0.2844, 'grad_norm': 0.6560395073685127, 'learning_rate': 1.5294065793329616e-06, 'epoch': 0.75} + 75%|███████▌ | 9165/12188 [23:23<6:47:48, 8.09s/it] 75%|███████▌ | 9166/12188 [23:30<6:34:31, 7.83s/it] {'loss': 0.283, 'grad_norm': 0.6807875724073007, 'learning_rate': 1.528450218761407e-06, 'epoch': 0.75} + 75%|███████▌ | 9166/12188 [23:30<6:34:31, 7.83s/it] 75%|███████▌ | 9167/12188 [23:38<6:39:20, 7.93s/it] {'loss': 0.3223, 'grad_norm': 0.7183227643857413, 'learning_rate': 1.5274941033451218e-06, 'epoch': 0.75} + 75%|███████▌ | 9167/12188 [23:38<6:39:20, 7.93s/it] 75%|███████▌ | 9168/12188 [23:46<6:35:49, 7.86s/it] {'loss': 0.2892, 'grad_norm': 0.7271724516487399, 'learning_rate': 1.5265382331516227e-06, 'epoch': 0.75} + 75%|███████▌ | 9168/12188 [23:46<6:35:49, 7.86s/it] 75%|███████▌ | 9169/12188 [23:53<6:21:43, 7.59s/it] {'loss': 0.2698, 'grad_norm': 0.657986725264206, 'learning_rate': 1.5255826082484126e-06, 'epoch': 0.75} + 75%|███████▌ | 9169/12188 [23:53<6:21:43, 7.59s/it] 75%|███████▌ | 9170/12188 [24:00<6:14:31, 7.45s/it] {'loss': 0.3045, 'grad_norm': 0.6534391545751441, 'learning_rate': 1.5246272287029783e-06, 'epoch': 0.75} + 75%|███████▌ | 9170/12188 [24:00<6:14:31, 7.45s/it] 75%|███████▌ | 9171/12188 [24:07<6:08:50, 7.34s/it] {'loss': 0.2911, 'grad_norm': 0.6509778658661204, 'learning_rate': 1.5236720945827844e-06, 'epoch': 0.75} + 75%|███████▌ | 9171/12188 [24:07<6:08:50, 7.34s/it] 75%|███████▌ | 9172/12188 [24:15<6:17:54, 7.52s/it] {'loss': 0.3421, 'grad_norm': 0.6690868549058394, 'learning_rate': 1.522717205955283e-06, 'epoch': 0.75} + 75%|███████▌ | 9172/12188 [24:15<6:17:54, 7.52s/it] 75%|███████▌ | 9173/12188 [24:22<6:08:13, 7.33s/it] {'loss': 0.3096, 'grad_norm': 0.6525673204976727, 'learning_rate': 1.5217625628879045e-06, 'epoch': 0.75} + 75%|███████▌ | 9173/12188 [24:22<6:08:13, 7.33s/it] 75%|███████▌ | 9174/12188 [24:30<6:17:57, 7.52s/it] {'loss': 0.3003, 'grad_norm': 0.7396936634119159, 'learning_rate': 1.520808165448066e-06, 'epoch': 0.75} + 75%|███████▌ | 9174/12188 [24:30<6:17:57, 7.52s/it] 75%|███████▌ | 9175/12188 [24:39<6:45:23, 8.07s/it] {'loss': 0.3414, 'grad_norm': 0.6636148684495844, 'learning_rate': 1.5198540137031665e-06, 'epoch': 0.75} + 75%|███████▌ | 9175/12188 [24:39<6:45:23, 8.07s/it] 75%|███████▌ | 9176/12188 [24:46<6:32:48, 7.82s/it] {'loss': 0.3574, 'grad_norm': 3.191919589800228, 'learning_rate': 1.5189001077205835e-06, 'epoch': 0.75} + 75%|███████▌ | 9176/12188 [24:46<6:32:48, 7.82s/it] 75%|███████▌ | 9177/12188 [24:55<6:37:35, 7.92s/it] {'loss': 0.3041, 'grad_norm': 0.7116793624624108, 'learning_rate': 1.5179464475676835e-06, 'epoch': 0.75} + 75%|███████▌ | 9177/12188 [24:55<6:37:35, 7.92s/it] 75%|███████▌ | 9178/12188 [25:01<6:18:18, 7.54s/it] {'loss': 0.2626, 'grad_norm': 0.7608789438966971, 'learning_rate': 1.5169930333118094e-06, 'epoch': 0.75} + 75%|███████▌ | 9178/12188 [25:01<6:18:18, 7.54s/it] 75%|███████▌ | 9179/12188 [25:09<6:17:52, 7.53s/it] {'loss': 0.3084, 'grad_norm': 0.674784568345504, 'learning_rate': 1.5160398650202935e-06, 'epoch': 0.75} + 75%|███████▌ | 9179/12188 [25:09<6:17:52, 7.53s/it] 75%|███████▌ | 9180/12188 [25:15<6:04:14, 7.27s/it] {'loss': 0.314, 'grad_norm': 0.7321304923826125, 'learning_rate': 1.5150869427604448e-06, 'epoch': 0.75} + 75%|███████▌ | 9180/12188 [25:15<6:04:14, 7.27s/it] 75%|███████▌ | 9181/12188 [25:23<6:06:34, 7.31s/it] {'loss': 0.2986, 'grad_norm': 0.6520031688040314, 'learning_rate': 1.5141342665995552e-06, 'epoch': 0.75} + 75%|███████▌ | 9181/12188 [25:23<6:06:34, 7.31s/it] 75%|███████▌ | 9182/12188 [25:30<6:00:07, 7.19s/it] {'loss': 0.2686, 'grad_norm': 0.8030035799669342, 'learning_rate': 1.5131818366049034e-06, 'epoch': 0.75} + 75%|███████▌ | 9182/12188 [25:30<6:00:07, 7.19s/it] 75%|███████▌ | 9183/12188 [25:37<6:00:23, 7.20s/it] {'loss': 0.3318, 'grad_norm': 0.7384619431204582, 'learning_rate': 1.5122296528437502e-06, 'epoch': 0.75} + 75%|███████▌ | 9183/12188 [25:37<6:00:23, 7.20s/it] 75%|███████▌ | 9184/12188 [25:46<6:35:10, 7.89s/it] {'loss': 0.3043, 'grad_norm': 0.6754281930775009, 'learning_rate': 1.5112777153833336e-06, 'epoch': 0.75} + 75%|███████▌ | 9184/12188 [25:46<6:35:10, 7.89s/it] 75%|███████▌ | 9185/12188 [25:53<6:17:53, 7.55s/it] {'loss': 0.2937, 'grad_norm': 0.6846731212773788, 'learning_rate': 1.5103260242908812e-06, 'epoch': 0.75} + 75%|███████▌ | 9185/12188 [25:53<6:17:53, 7.55s/it] 75%|███████▌ | 9186/12188 [26:02<6:43:04, 8.06s/it] {'loss': 0.2814, 'grad_norm': 0.7290069618841798, 'learning_rate': 1.5093745796335969e-06, 'epoch': 0.75} + 75%|███████▌ | 9186/12188 [26:02<6:43:04, 8.06s/it] 75%|███████▌ | 9187/12188 [26:10<6:31:33, 7.83s/it] {'loss': 0.3239, 'grad_norm': 0.7203253214957259, 'learning_rate': 1.5084233814786715e-06, 'epoch': 0.75} + 75%|���██████▌ | 9187/12188 [26:10<6:31:33, 7.83s/it] 75%|███████▌ | 9188/12188 [26:17<6:20:45, 7.62s/it] {'loss': 0.3111, 'grad_norm': 0.7225019351014466, 'learning_rate': 1.507472429893279e-06, 'epoch': 0.75} + 75%|███████▌ | 9188/12188 [26:17<6:20:45, 7.62s/it] 75%|███████▌ | 9189/12188 [26:24<6:10:37, 7.42s/it] {'loss': 0.2787, 'grad_norm': 0.7164073393245418, 'learning_rate': 1.5065217249445706e-06, 'epoch': 0.75} + 75%|███████▌ | 9189/12188 [26:24<6:10:37, 7.42s/it] 75%|███████▌ | 9190/12188 [26:31<6:03:05, 7.27s/it] {'loss': 0.3627, 'grad_norm': 0.7213558175888527, 'learning_rate': 1.5055712666996875e-06, 'epoch': 0.75} + 75%|███████▌ | 9190/12188 [26:31<6:03:05, 7.27s/it] 75%|███████▌ | 9191/12188 [26:38<6:00:54, 7.23s/it] {'loss': 0.2952, 'grad_norm': 0.7129719831062662, 'learning_rate': 1.504621055225745e-06, 'epoch': 0.75} + 75%|███████▌ | 9191/12188 [26:38<6:00:54, 7.23s/it] 75%|███████▌ | 9192/12188 [26:45<6:04:46, 7.31s/it] {'loss': 0.2986, 'grad_norm': 0.754976649542303, 'learning_rate': 1.5036710905898494e-06, 'epoch': 0.75} + 75%|███████▌ | 9192/12188 [26:45<6:04:46, 7.31s/it] 75%|███████▌ | 9193/12188 [26:54<6:31:28, 7.84s/it] {'loss': 0.2905, 'grad_norm': 0.660755308835193, 'learning_rate': 1.5027213728590846e-06, 'epoch': 0.75} + 75%|███████▌ | 9193/12188 [26:54<6:31:28, 7.84s/it] 75%|███████▌ | 9194/12188 [27:01<6:16:49, 7.55s/it] {'loss': 0.3214, 'grad_norm': 0.7750331331952097, 'learning_rate': 1.5017719021005156e-06, 'epoch': 0.75} + 75%|███████▌ | 9194/12188 [27:01<6:16:49, 7.55s/it] 75%|███████▌ | 9195/12188 [27:09<6:26:10, 7.74s/it] {'loss': 0.2795, 'grad_norm': 0.642645921460962, 'learning_rate': 1.5008226783811964e-06, 'epoch': 0.75} + 75%|███████▌ | 9195/12188 [27:09<6:26:10, 7.74s/it] 75%|███████▌ | 9196/12188 [27:17<6:26:06, 7.74s/it] {'loss': 0.3048, 'grad_norm': 0.6746223973324793, 'learning_rate': 1.4998737017681564e-06, 'epoch': 0.75} + 75%|███████▌ | 9196/12188 [27:17<6:26:06, 7.74s/it] 75%|███████▌ | 9197/12188 [27:25<6:21:20, 7.65s/it] {'loss': 0.2843, 'grad_norm': 0.6803201156766523, 'learning_rate': 1.4989249723284115e-06, 'epoch': 0.75} + 75%|███████▌ | 9197/12188 [27:25<6:21:20, 7.65s/it] 75%|███████▌ | 9198/12188 [27:33<6:34:49, 7.92s/it] {'loss': 0.2908, 'grad_norm': 0.7619729009169417, 'learning_rate': 1.4979764901289622e-06, 'epoch': 0.75} + 75%|███████▌ | 9198/12188 [27:33<6:34:49, 7.92s/it] 75%|███████▌ | 9199/12188 [27:41<6:26:43, 7.76s/it] {'loss': 0.301, 'grad_norm': 0.6138998504762339, 'learning_rate': 1.4970282552367854e-06, 'epoch': 0.75} + 75%|███████▌ | 9199/12188 [27:41<6:26:43, 7.76s/it] 75%|███████▌ | 9200/12188 [27:47<6:13:29, 7.50s/it] {'loss': 0.275, 'grad_norm': 0.7329022478954274, 'learning_rate': 1.4960802677188447e-06, 'epoch': 0.75} + 75%|███████▌ | 9200/12188 [27:47<6:13:29, 7.50s/it] 75%|███████▌ | 9201/12188 [27:55<6:10:46, 7.45s/it] {'loss': 0.3231, 'grad_norm': 0.6981227559973097, 'learning_rate': 1.495132527642088e-06, 'epoch': 0.75} + 75%|███████▌ | 9201/12188 [27:55<6:10:46, 7.45s/it] 76%|███████▌ | 9202/12188 [28:01<5:58:54, 7.21s/it] {'loss': 0.31, 'grad_norm': 0.7140241735386723, 'learning_rate': 1.4941850350734393e-06, 'epoch': 0.75} + 76%|███████▌ | 9202/12188 [28:01<5:58:54, 7.21s/it] 76%|███████▌ | 9203/12188 [28:09<6:02:07, 7.28s/it] {'loss': 0.3064, 'grad_norm': 0.6953399072965489, 'learning_rate': 1.4932377900798128e-06, 'epoch': 0.76} + 76%|███████▌ | 9203/12188 [28:09<6:02:07, 7.28s/it] 76%|███████▌ | 9204/12188 [28:16<6:00:37, 7.25s/it] {'loss': 0.3226, 'grad_norm': 0.6942969378417492, 'learning_rate': 1.4922907927280978e-06, 'epoch': 0.76} + 76%|███████▌ | 9204/12188 [28:16<6:00:37, 7.25s/it] 76%|███████▌ | 9205/12188 [28:27<6:59:16, 8.43s/it] {'loss': 0.2913, 'grad_norm': 0.7214004599936166, 'learning_rate': 1.4913440430851734e-06, 'epoch': 0.76} + 76%|███████▌ | 9205/12188 [28:27<6:59:16, 8.43s/it] 76%|███████▌ | 9206/12188 [28:34<6:38:20, 8.01s/it] {'loss': 0.2865, 'grad_norm': 0.8525021562435114, 'learning_rate': 1.4903975412178933e-06, 'epoch': 0.76} + 76%|███████▌ | 9206/12188 [28:34<6:38:20, 8.01s/it] 76%|███████▌ | 9207/12188 [28:43<6:48:28, 8.22s/it] {'loss': 0.3277, 'grad_norm': 0.6993586753442246, 'learning_rate': 1.4894512871931022e-06, 'epoch': 0.76} + 76%|███████▌ | 9207/12188 [28:43<6:48:28, 8.22s/it] 76%|███████▌ | 9208/12188 [28:50<6:36:38, 7.99s/it] {'loss': 0.3351, 'grad_norm': 0.7134090588152315, 'learning_rate': 1.4885052810776213e-06, 'epoch': 0.76} + 76%|███████▌ | 9208/12188 [28:50<6:36:38, 7.99s/it] 76%|███████▌ | 9209/12188 [28:58<6:29:38, 7.85s/it] {'loss': 0.3263, 'grad_norm': 0.7073667559518289, 'learning_rate': 1.4875595229382538e-06, 'epoch': 0.76} + 76%|███████▌ | 9209/12188 [28:58<6:29:38, 7.85s/it] 76%|███████▌ | 9210/12188 [29:05<6:19:41, 7.65s/it] {'loss': 0.3188, 'grad_norm': 0.7541154750864597, 'learning_rate': 1.48661401284179e-06, 'epoch': 0.76} + 76%|███████▌ | 9210/12188 [29:05<6:19:41, 7.65s/it] 76%|███████▌ | 9211/12188 [29:12<6:12:40, 7.51s/it] {'loss': 0.3132, 'grad_norm': 0.698358085869162, 'learning_rate': 1.4856687508550012e-06, 'epoch': 0.76} + 76%|███████▌ | 9211/12188 [29:12<6:12:40, 7.51s/it] 76%|███████▌ | 9212/12188 [29:19<6:02:07, 7.30s/it] {'loss': 0.2778, 'grad_norm': 0.6661904949246041, 'learning_rate': 1.4847237370446378e-06, 'epoch': 0.76} + 76%|███████▌ | 9212/12188 [29:19<6:02:07, 7.30s/it] 76%|███████▌ | 9213/12188 [29:27<6:02:53, 7.32s/it] {'loss': 0.293, 'grad_norm': 1.1116093088864225, 'learning_rate': 1.4837789714774375e-06, 'epoch': 0.76} + 76%|███████▌ | 9213/12188 [29:27<6:02:53, 7.32s/it] 76%|███████▌ | 9214/12188 [29:33<5:56:16, 7.19s/it] {'loss': 0.2865, 'grad_norm': 0.756842306798306, 'learning_rate': 1.4828344542201155e-06, 'epoch': 0.76} + 76%|███████▌ | 9214/12188 [29:33<5:56:16, 7.19s/it] 76%|███████▌ | 9215/12188 [29:40<5:48:15, 7.03s/it] {'loss': 0.2822, 'grad_norm': 0.7275980221026159, 'learning_rate': 1.4818901853393735e-06, 'epoch': 0.76} + 76%|███████▌ | 9215/12188 [29:40<5:48:15, 7.03s/it] 76%|███████▌ | 9216/12188 [29:48<5:54:17, 7.15s/it] {'loss': 0.2974, 'grad_norm': 0.7555158607721775, 'learning_rate': 1.4809461649018964e-06, 'epoch': 0.76} + 76%|███████▌ | 9216/12188 [29:48<5:54:17, 7.15s/it] 76%|███████▌ | 9217/12188 [29:55<6:03:58, 7.35s/it] {'loss': 0.2957, 'grad_norm': 0.661055058867387, 'learning_rate': 1.4800023929743452e-06, 'epoch': 0.76} + 76%|███████▌ | 9217/12188 [29:55<6:03:58, 7.35s/it] 76%|███████▌ | 9218/12188 [30:02<5:52:58, 7.13s/it] {'loss': 0.3114, 'grad_norm': 0.694163802668386, 'learning_rate': 1.4790588696233698e-06, 'epoch': 0.76} + 76%|███████▌ | 9218/12188 [30:02<5:52:58, 7.13s/it] 76%|███████▌ | 9219/12188 [30:09<5:52:20, 7.12s/it] {'loss': 0.2758, 'grad_norm': 0.9195982955353118, 'learning_rate': 1.4781155949156023e-06, 'epoch': 0.76} + 76%|███████▌ | 9219/12188 [30:09<5:52:20, 7.12s/it] 76%|███████▌ | 9220/12188 [30:20<6:47:55, 8.25s/it] {'loss': 0.3351, 'grad_norm': 0.7221824786586951, 'learning_rate': 1.4771725689176524e-06, 'epoch': 0.76} + 76%|███████▌ | 9220/12188 [30:20<6:47:55, 8.25s/it] 76%|███████▌ | 9221/12188 [30:29<6:56:35, 8.42s/it] {'loss': 0.3426, 'grad_norm': 0.7003842661623756, 'learning_rate': 1.4762297916961166e-06, 'epoch': 0.76} + 76%|███████▌ | 9221/12188 [30:29<6:56:35, 8.42s/it] 76%|███████▌ | 9222/12188 [30:36<6:38:41, 8.07s/it] {'loss': 0.2569, 'grad_norm': 0.6579569838708601, 'learning_rate': 1.4752872633175691e-06, 'epoch': 0.76} + 76%|███████▌ | 9222/12188 [30:36<6:38:41, 8.07s/it] 76%|███████▌ | 9223/12188 [30:46<7:06:02, 8.62s/it] {'loss': 0.2797, 'grad_norm': 0.7392316193781011, 'learning_rate': 1.4743449838485729e-06, 'epoch': 0.76} + 76%|███████▌ | 9223/12188 [30:46<7:06:02, 8.62s/it] 76%|███████▌ | 9224/12188 [30:55<7:17:19, 8.85s/it] {'loss': 0.272, 'grad_norm': 0.6812342101443275, 'learning_rate': 1.473402953355671e-06, 'epoch': 0.76} + 76%|███████▌ | 9224/12188 [30:55<7:17:19, 8.85s/it] 76%|███████▌ | 9225/12188 [31:03<7:06:23, 8.63s/it] {'loss': 0.2773, 'grad_norm': 0.7261298334809048, 'learning_rate': 1.472461171905385e-06, 'epoch': 0.76} + 76%|███████▌ | 9225/12188 [31:03<7:06:23, 8.63s/it] 76%|███████▌ | 9226/12188 [31:10<6:38:58, 8.08s/it] {'loss': 0.3235, 'grad_norm': 0.6840227282079538, 'learning_rate': 1.471519639564225e-06, 'epoch': 0.76} + 76%|███████▌ | 9226/12188 [31:10<6:38:58, 8.08s/it] 76%|███████▌ | 9227/12188 [31:17<6:17:21, 7.65s/it] {'loss': 0.3362, 'grad_norm': 0.7364400679685211, 'learning_rate': 1.4705783563986774e-06, 'epoch': 0.76} + 76%|███████▌ | 9227/12188 [31:17<6:17:21, 7.65s/it] 76%|███████▌ | 9228/12188 [31:24<6:08:05, 7.46s/it] {'loss': 0.3003, 'grad_norm': 0.7055406173826272, 'learning_rate': 1.4696373224752163e-06, 'epoch': 0.76} + 76%|███████▌ | 9228/12188 [31:24<6:08:05, 7.46s/it] 76%|███████▌ | 9229/12188 [31:31<6:04:14, 7.39s/it] {'loss': 0.2691, 'grad_norm': 0.7200501568089207, 'learning_rate': 1.468696537860297e-06, 'epoch': 0.76} + 76%|███████▌ | 9229/12188 [31:31<6:04:14, 7.39s/it] 76%|███████▌ | 9230/12188 [31:38<6:00:08, 7.31s/it] {'loss': 0.2906, 'grad_norm': 0.6776057596254301, 'learning_rate': 1.467756002620353e-06, 'epoch': 0.76} + 76%|███████▌ | 9230/12188 [31:38<6:00:08, 7.31s/it] 76%|███████▌ | 9231/12188 [31:45<5:59:43, 7.30s/it] {'loss': 0.319, 'grad_norm': 0.7835559404156061, 'learning_rate': 1.4668157168218067e-06, 'epoch': 0.76} + 76%|███████▌ | 9231/12188 [31:46<5:59:43, 7.30s/it] 76%|███████▌ | 9232/12188 [31:52<5:54:11, 7.19s/it] {'loss': 0.3026, 'grad_norm': 0.6822266091205285, 'learning_rate': 1.4658756805310565e-06, 'epoch': 0.76} + 76%|███████▌ | 9232/12188 [31:52<5:54:11, 7.19s/it] 76%|███████▌ | 9233/12188 [32:00<6:04:57, 7.41s/it] {'loss': 0.3285, 'grad_norm': 0.6799446615307962, 'learning_rate': 1.4649358938144891e-06, 'epoch': 0.76} + 76%|███████▌ | 9233/12188 [32:00<6:04:57, 7.41s/it] 76%|███████▌ | 9234/12188 [32:07<5:56:20, 7.24s/it] {'loss': 0.3014, 'grad_norm': 0.7425403576103549, 'learning_rate': 1.4639963567384679e-06, 'epoch': 0.76} + 76%|███████▌ | 9234/12188 [32:07<5:56:20, 7.24s/it] 76%|███████▌ | 9235/12188 [32:14<5:53:37, 7.19s/it] {'loss': 0.3076, 'grad_norm': 0.7040123270424096, 'learning_rate': 1.4630570693693447e-06, 'epoch': 0.76} + 76%|███████▌ | 9235/12188 [32:14<5:53:37, 7.19s/it] 76%|███████▌ | 9236/12188 [32:21<5:54:21, 7.20s/it] {'loss': 0.3202, 'grad_norm': 0.682921712997639, 'learning_rate': 1.4621180317734468e-06, 'epoch': 0.76} + 76%|███████▌ | 9236/12188 [32:22<5:54:21, 7.20s/it] 76%|███████▌ | 9237/12188 [32:29<5:56:34, 7.25s/it] {'loss': 0.3043, 'grad_norm': 0.7054652397741124, 'learning_rate': 1.4611792440170914e-06, 'epoch': 0.76} + 76%|███████▌ | 9237/12188 [32:29<5:56:34, 7.25s/it] 76%|███████▌ | 9238/12188 [32:36<5:52:17, 7.17s/it] {'loss': 0.3274, 'grad_norm': 0.718690519577948, 'learning_rate': 1.4602407061665702e-06, 'epoch': 0.76} + 76%|███████▌ | 9238/12188 [32:36<5:52:17, 7.17s/it] 76%|███████▌ | 9239/12188 [32:42<5:42:37, 6.97s/it] {'loss': 0.3045, 'grad_norm': 0.6644921582658266, 'learning_rate': 1.4593024182881655e-06, 'epoch': 0.76} + 76%|███████▌ | 9239/12188 [32:42<5:42:37, 6.97s/it] 76%|███████▌ | 9240/12188 [32:50<5:50:29, 7.13s/it] {'loss': 0.3112, 'grad_norm': 0.7162049255688311, 'learning_rate': 1.4583643804481334e-06, 'epoch': 0.76} + 76%|███████▌ | 9240/12188 [32:50<5:50:29, 7.13s/it] 76%|███████▌ | 9241/12188 [32:58<6:12:14, 7.58s/it] {'loss': 0.3114, 'grad_norm': 0.6988928740569651, 'learning_rate': 1.457426592712719e-06, 'epoch': 0.76} + 76%|███████▌ | 9241/12188 [32:58<6:12:14, 7.58s/it] 76%|███████▌ | 9242/12188 [33:06<6:08:00, 7.49s/it] {'loss': 0.2637, 'grad_norm': 0.7530640242009705, 'learning_rate': 1.4564890551481481e-06, 'epoch': 0.76} + 76%|███████▌ | 9242/12188 [33:06<6:08:00, 7.49s/it] 76%|███████▌ | 9243/12188 [33:13<6:07:39, 7.49s/it] {'loss': 0.2433, 'grad_norm': 0.6548753628879006, 'learning_rate': 1.455551767820626e-06, 'epoch': 0.76} + 76%|███████▌ | 9243/12188 [33:13<6:07:39, 7.49s/it] 76%|███████▌ | 9244/12188 [33:20<6:00:55, 7.36s/it] {'loss': 0.2849, 'grad_norm': 0.6187969151752795, 'learning_rate': 1.4546147307963449e-06, 'epoch': 0.76} + 76%|███████▌ | 9244/12188 [33:20<6:00:55, 7.36s/it] 76%|███████▌ | 9245/12188 [33:27<5:56:33, 7.27s/it] {'loss': 0.2881, 'grad_norm': 0.6977057251403874, 'learning_rate': 1.453677944141474e-06, 'epoch': 0.76} + 76%|███████▌ | 9245/12188 [33:27<5:56:33, 7.27s/it] 76%|███████▌ | 9246/12188 [33:34<5:53:53, 7.22s/it] {'loss': 0.2968, 'grad_norm': 0.6382797864891625, 'learning_rate': 1.4527414079221686e-06, 'epoch': 0.76} + 76%|███████▌ | 9246/12188 [33:34<5:53:53, 7.22s/it] 76%|███████▌ | 9247/12188 [33:42<5:52:35, 7.19s/it] {'loss': 0.2798, 'grad_norm': 0.6696849898780775, 'learning_rate': 1.4518051222045675e-06, 'epoch': 0.76} + 76%|███████▌ | 9247/12188 [33:42<5:52:35, 7.19s/it] 76%|███████▌ | 9248/12188 [33:49<5:55:42, 7.26s/it] {'loss': 0.3067, 'grad_norm': 0.6320923057655288, 'learning_rate': 1.4508690870547888e-06, 'epoch': 0.76} + 76%|███████▌ | 9248/12188 [33:49<5:55:42, 7.26s/it] 76%|███████▌ | 9249/12188 [33:56<5:53:42, 7.22s/it] {'loss': 0.2941, 'grad_norm': 0.7498836072215277, 'learning_rate': 1.4499333025389323e-06, 'epoch': 0.76} + 76%|███████▌ | 9249/12188 [33:56<5:53:42, 7.22s/it] 76%|███████▌ | 9250/12188 [34:03<5:51:15, 7.17s/it] {'loss': 0.3312, 'grad_norm': 0.6826060213679302, 'learning_rate': 1.448997768723081e-06, 'epoch': 0.76} + 76%|███████▌ | 9250/12188 [34:03<5:51:15, 7.17s/it] 76%|███████▌ | 9251/12188 [34:10<5:45:37, 7.06s/it] {'loss': 0.3097, 'grad_norm': 1.0009028410180587, 'learning_rate': 1.4480624856733022e-06, 'epoch': 0.76} + 76%|███████▌ | 9251/12188 [34:10<5:45:37, 7.06s/it] 76%|███████▌ | 9252/12188 [34:17<5:41:53, 6.99s/it] {'loss': 0.3221, 'grad_norm': 0.7371326961357251, 'learning_rate': 1.4471274534556457e-06, 'epoch': 0.76} + 76%|███████▌ | 9252/12188 [34:17<5:41:53, 6.99s/it] 76%|███████▌ | 9253/12188 [34:24<5:40:34, 6.96s/it] {'loss': 0.2887, 'grad_norm': 0.6499480125468649, 'learning_rate': 1.4461926721361386e-06, 'epoch': 0.76} + 76%|███████▌ | 9253/12188 [34:24<5:40:34, 6.96s/it] 76%|███████▌ | 9254/12188 [34:31<5:39:46, 6.95s/it] {'loss': 0.3513, 'grad_norm': 0.7694362692281264, 'learning_rate': 1.4452581417807954e-06, 'epoch': 0.76} + 76%|███████▌ | 9254/12188 [34:31<5:39:46, 6.95s/it] 76%|█��█████▌ | 9255/12188 [34:38<5:44:15, 7.04s/it] {'loss': 0.3238, 'grad_norm': 0.7282890715211622, 'learning_rate': 1.4443238624556127e-06, 'epoch': 0.76} + 76%|███████▌ | 9255/12188 [34:38<5:44:15, 7.04s/it] 76%|███████▌ | 9256/12188 [34:45<5:45:23, 7.07s/it] {'loss': 0.2972, 'grad_norm': 0.7098687955274144, 'learning_rate': 1.4433898342265646e-06, 'epoch': 0.76} + 76%|███████▌ | 9256/12188 [34:45<5:45:23, 7.07s/it] 76%|███████▌ | 9257/12188 [34:53<5:51:55, 7.20s/it] {'loss': 0.301, 'grad_norm': 0.6558852745064866, 'learning_rate': 1.4424560571596135e-06, 'epoch': 0.76} + 76%|███████▌ | 9257/12188 [34:53<5:51:55, 7.20s/it] 76%|███████▌ | 9258/12188 [35:00<5:55:03, 7.27s/it] {'loss': 0.3011, 'grad_norm': 1.1541714737177515, 'learning_rate': 1.4415225313206987e-06, 'epoch': 0.76} + 76%|███████▌ | 9258/12188 [35:00<5:55:03, 7.27s/it] 76%|███████▌ | 9259/12188 [35:07<5:56:39, 7.31s/it] {'loss': 0.2909, 'grad_norm': 0.7142716166552527, 'learning_rate': 1.4405892567757456e-06, 'epoch': 0.76} + 76%|███████▌ | 9259/12188 [35:07<5:56:39, 7.31s/it] 76%|███████▌ | 9260/12188 [35:14<5:53:36, 7.25s/it] {'loss': 0.2945, 'grad_norm': 0.6808382242791372, 'learning_rate': 1.4396562335906622e-06, 'epoch': 0.76} + 76%|███████▌ | 9260/12188 [35:14<5:53:36, 7.25s/it] 76%|███████▌ | 9261/12188 [35:22<5:58:43, 7.35s/it] {'loss': 0.3007, 'grad_norm': 0.6825213403184629, 'learning_rate': 1.4387234618313357e-06, 'epoch': 0.76} + 76%|███████▌ | 9261/12188 [35:22<5:58:43, 7.35s/it] 76%|███████▌ | 9262/12188 [35:30<6:12:03, 7.63s/it] {'loss': 0.329, 'grad_norm': 0.7037418250309235, 'learning_rate': 1.4377909415636349e-06, 'epoch': 0.76} + 76%|███████▌ | 9262/12188 [35:30<6:12:03, 7.63s/it] 76%|███████▌ | 9263/12188 [35:40<6:42:10, 8.25s/it] {'loss': 0.2629, 'grad_norm': 0.6354895164170925, 'learning_rate': 1.4368586728534162e-06, 'epoch': 0.76} + 76%|███████▌ | 9263/12188 [35:40<6:42:10, 8.25s/it] 76%|███████▌ | 9264/12188 [35:47<6:21:16, 7.82s/it] {'loss': 0.2928, 'grad_norm': 0.7174339256110237, 'learning_rate': 1.435926655766512e-06, 'epoch': 0.76} + 76%|███████▌ | 9264/12188 [35:47<6:21:16, 7.82s/it] 76%|███████▌ | 9265/12188 [35:54<6:13:35, 7.67s/it] {'loss': 0.3195, 'grad_norm': 0.7573908535082903, 'learning_rate': 1.4349948903687428e-06, 'epoch': 0.76} + 76%|███████▌ | 9265/12188 [35:54<6:13:35, 7.67s/it] 76%|███████▌ | 9266/12188 [36:01<6:01:35, 7.42s/it] {'loss': 0.3274, 'grad_norm': 1.0978568632099286, 'learning_rate': 1.434063376725905e-06, 'epoch': 0.76} + 76%|███████▌ | 9266/12188 [36:01<6:01:35, 7.42s/it] 76%|███████▌ | 9267/12188 [36:08<5:53:23, 7.26s/it] {'loss': 0.2964, 'grad_norm': 0.6960448399426167, 'learning_rate': 1.433132114903784e-06, 'epoch': 0.76} + 76%|███████▌ | 9267/12188 [36:08<5:53:23, 7.26s/it] 76%|███████▌ | 9268/12188 [36:15<5:47:00, 7.13s/it] {'loss': 0.3088, 'grad_norm': 0.7238711963858693, 'learning_rate': 1.432201104968141e-06, 'epoch': 0.76} + 76%|███████▌ | 9268/12188 [36:15<5:47:00, 7.13s/it] 76%|███████▌ | 9269/12188 [36:25<6:32:14, 8.06s/it] {'loss': 0.284, 'grad_norm': 0.7778066039005469, 'learning_rate': 1.4312703469847238e-06, 'epoch': 0.76} + 76%|███████▌ | 9269/12188 [36:25<6:32:14, 8.06s/it] 76%|███████▌ | 9270/12188 [36:32<6:17:52, 7.77s/it] {'loss': 0.2737, 'grad_norm': 0.7202979909856309, 'learning_rate': 1.430339841019263e-06, 'epoch': 0.76} + 76%|███████▌ | 9270/12188 [36:32<6:17:52, 7.77s/it] 76%|███████▌ | 9271/12188 [36:39<6:08:07, 7.57s/it] {'loss': 0.2908, 'grad_norm': 0.7202120969311447, 'learning_rate': 1.4294095871374658e-06, 'epoch': 0.76} + 76%|██████���▌ | 9271/12188 [36:39<6:08:07, 7.57s/it] 76%|███████▌ | 9272/12188 [36:49<6:35:05, 8.13s/it] {'loss': 0.2975, 'grad_norm': 0.6440082928498808, 'learning_rate': 1.4284795854050265e-06, 'epoch': 0.76} + 76%|███████▌ | 9272/12188 [36:49<6:35:05, 8.13s/it] 76%|███████▌ | 9273/12188 [36:55<6:13:26, 7.69s/it] {'loss': 0.3035, 'grad_norm': 0.7076982961292944, 'learning_rate': 1.4275498358876227e-06, 'epoch': 0.76} + 76%|███████▌ | 9273/12188 [36:55<6:13:26, 7.69s/it] 76%|███████▌ | 9274/12188 [37:02<6:05:36, 7.53s/it] {'loss': 0.2918, 'grad_norm': 0.6965634754577341, 'learning_rate': 1.4266203386509087e-06, 'epoch': 0.76} + 76%|███████▌ | 9274/12188 [37:02<6:05:36, 7.53s/it] 76%|███████▌ | 9275/12188 [37:09<5:59:05, 7.40s/it] {'loss': 0.2737, 'grad_norm': 0.7274097777569987, 'learning_rate': 1.4256910937605263e-06, 'epoch': 0.76} + 76%|███████▌ | 9275/12188 [37:10<5:59:05, 7.40s/it] 76%|███████▌ | 9276/12188 [37:17<6:00:00, 7.42s/it] {'loss': 0.2845, 'grad_norm': 0.6886811359824375, 'learning_rate': 1.4247621012820967e-06, 'epoch': 0.76} + 76%|███████▌ | 9276/12188 [37:17<6:00:00, 7.42s/it] 76%|███████▌ | 9277/12188 [37:25<6:01:50, 7.46s/it] {'loss': 0.2935, 'grad_norm': 0.8710762267617796, 'learning_rate': 1.4238333612812215e-06, 'epoch': 0.76} + 76%|███████▌ | 9277/12188 [37:25<6:01:50, 7.46s/it] 76%|███████▌ | 9278/12188 [37:31<5:50:28, 7.23s/it] {'loss': 0.2946, 'grad_norm': 0.7857801119031241, 'learning_rate': 1.4229048738234907e-06, 'epoch': 0.76} + 76%|███████▌ | 9278/12188 [37:31<5:50:28, 7.23s/it] 76%|███████▌ | 9279/12188 [37:38<5:39:45, 7.01s/it] {'loss': 0.33, 'grad_norm': 0.6691777722420632, 'learning_rate': 1.421976638974469e-06, 'epoch': 0.76} + 76%|███████▌ | 9279/12188 [37:38<5:39:45, 7.01s/it] 76%|███████▌ | 9280/12188 [37:45<5:39:44, 7.01s/it] {'loss': 0.3319, 'grad_norm': 0.674172836804011, 'learning_rate': 1.42104865679971e-06, 'epoch': 0.76} + 76%|███████▌ | 9280/12188 [37:45<5:39:44, 7.01s/it] 76%|███████▌ | 9281/12188 [37:52<5:37:41, 6.97s/it] {'loss': 0.3113, 'grad_norm': 0.6273823089372486, 'learning_rate': 1.4201209273647431e-06, 'epoch': 0.76} + 76%|███████▌ | 9281/12188 [37:52<5:37:41, 6.97s/it] 76%|███████▌ | 9282/12188 [38:00<5:54:25, 7.32s/it] {'loss': 0.2718, 'grad_norm': 0.6674981642152833, 'learning_rate': 1.4191934507350851e-06, 'epoch': 0.76} + 76%|███████▌ | 9282/12188 [38:00<5:54:25, 7.32s/it] 76%|███████▌ | 9283/12188 [38:08<6:15:24, 7.75s/it] {'loss': 0.3036, 'grad_norm': 0.684316638559796, 'learning_rate': 1.4182662269762342e-06, 'epoch': 0.76} + 76%|███████▌ | 9283/12188 [38:08<6:15:24, 7.75s/it] 76%|███████▌ | 9284/12188 [38:16<6:10:44, 7.66s/it] {'loss': 0.2942, 'grad_norm': 0.6308855713960391, 'learning_rate': 1.4173392561536665e-06, 'epoch': 0.76} + 76%|███████▌ | 9284/12188 [38:16<6:10:44, 7.66s/it] 76%|███████▌ | 9285/12188 [38:24<6:10:13, 7.65s/it] {'loss': 0.2872, 'grad_norm': 0.6566180033423976, 'learning_rate': 1.4164125383328453e-06, 'epoch': 0.76} + 76%|███████▌ | 9285/12188 [38:24<6:10:13, 7.65s/it] 76%|███████▌ | 9286/12188 [38:30<5:58:48, 7.42s/it] {'loss': 0.2804, 'grad_norm': 0.6715570915998335, 'learning_rate': 1.415486073579212e-06, 'epoch': 0.76} + 76%|███████▌ | 9286/12188 [38:30<5:58:48, 7.42s/it] 76%|███████▌ | 9287/12188 [38:39<6:18:08, 7.82s/it] {'loss': 0.3425, 'grad_norm': 0.7300449007503895, 'learning_rate': 1.4145598619581935e-06, 'epoch': 0.76} + 76%|███████▌ | 9287/12188 [38:39<6:18:08, 7.82s/it] 76%|███████▌ | 9288/12188 [38:46<6:09:38, 7.65s/it] {'loss': 0.2896, 'grad_norm': 0.6540424568609088, 'learning_rate': 1.4136339035351988e-06, 'epoch': 0.76} + 76%|███████▌ | 9288/12188 [38:46<6:09:38, 7.65s/it] 76%|███████▌ | 9289/12188 [38:56<6:35:46, 8.19s/it] {'loss': 0.3045, 'grad_norm': 1.3556891082007572, 'learning_rate': 1.4127081983756152e-06, 'epoch': 0.76} + 76%|███████▌ | 9289/12188 [38:56<6:35:46, 8.19s/it] 76%|███████▌ | 9290/12188 [39:03<6:18:10, 7.83s/it] {'loss': 0.3407, 'grad_norm': 0.8025580035668011, 'learning_rate': 1.4117827465448142e-06, 'epoch': 0.76} + 76%|███████▌ | 9290/12188 [39:03<6:18:10, 7.83s/it] 76%|███████▌ | 9291/12188 [39:10<6:06:01, 7.58s/it] {'loss': 0.3283, 'grad_norm': 0.6905415791545629, 'learning_rate': 1.4108575481081522e-06, 'epoch': 0.76} + 76%|███████▌ | 9291/12188 [39:10<6:06:01, 7.58s/it] 76%|███████▌ | 9292/12188 [39:17<5:57:44, 7.41s/it] {'loss': 0.3114, 'grad_norm': 0.6857240416789837, 'learning_rate': 1.4099326031309617e-06, 'epoch': 0.76} + 76%|███████▌ | 9292/12188 [39:17<5:57:44, 7.41s/it] 76%|███████▌ | 9293/12188 [39:24<5:51:26, 7.28s/it] {'loss': 0.2726, 'grad_norm': 0.6934297143092084, 'learning_rate': 1.4090079116785649e-06, 'epoch': 0.76} + 76%|███████▌ | 9293/12188 [39:24<5:51:26, 7.28s/it] 76%|███████▋ | 9294/12188 [39:33<6:14:22, 7.76s/it] {'loss': 0.3368, 'grad_norm': 0.686659466308641, 'learning_rate': 1.4080834738162585e-06, 'epoch': 0.76} + 76%|███████▋ | 9294/12188 [39:33<6:14:22, 7.76s/it] 76%|███████▋ | 9295/12188 [39:39<5:58:48, 7.44s/it] {'loss': 0.2933, 'grad_norm': 0.7335486166023579, 'learning_rate': 1.4071592896093261e-06, 'epoch': 0.76} + 76%|███████▋ | 9295/12188 [39:39<5:58:48, 7.44s/it] 76%|███████▋ | 9296/12188 [39:46<5:51:00, 7.28s/it] {'loss': 0.2923, 'grad_norm': 0.7483965767782312, 'learning_rate': 1.4062353591230343e-06, 'epoch': 0.76} + 76%|███████▋ | 9296/12188 [39:46<5:51:00, 7.28s/it] 76%|███████▋ | 9297/12188 [39:53<5:37:11, 7.00s/it] {'loss': 0.3003, 'grad_norm': 0.6863755778645716, 'learning_rate': 1.4053116824226253e-06, 'epoch': 0.76} + 76%|███████▋ | 9297/12188 [39:53<5:37:11, 7.00s/it] 76%|███████▋ | 9298/12188 [40:00<5:35:41, 6.97s/it] {'loss': 0.3218, 'grad_norm': 0.712397095393335, 'learning_rate': 1.4043882595733322e-06, 'epoch': 0.76} + 76%|███████▋ | 9298/12188 [40:00<5:35:41, 6.97s/it] 76%|███████▋ | 9299/12188 [40:07<5:44:23, 7.15s/it] {'loss': 0.2921, 'grad_norm': 0.6932781518656108, 'learning_rate': 1.4034650906403618e-06, 'epoch': 0.76} + 76%|███████▋ | 9299/12188 [40:07<5:44:23, 7.15s/it] 76%|███████▋ | 9300/12188 [40:14<5:46:25, 7.20s/it] {'loss': 0.2639, 'grad_norm': 0.6949008051618677, 'learning_rate': 1.402542175688908e-06, 'epoch': 0.76} + 76%|███████▋ | 9300/12188 [40:14<5:46:25, 7.20s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1390, in _get_item + sources = self.preprocess_conversation_format( + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1181, in preprocess_conversation_format + msg = format_grounding_internvl2qwenvl( + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 394, in format_grounding_internvl2qwenvl + new_message = find_bbox(ref_matches, message, new_image_size) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 257, in find_bbox + assert len(ref_matches) == len( +AssertionError: ref_matches: ['{{ref-web|títol= Une «bombe météorologique» déclenche la panique au Canada et aux États-Unis | url= http://ec.gc.ca/meteo-weather/default.asp?lang=Fr&n=47397091-1 |lloc= [[Service météorologique du Canada]] |editor= [[Environnement Canada]] |consulta= 26 desembre 2013}}', '{{ref-publicació|cognom=Martín León|nom=F|títol=El concepto de ciclogénesis explosiva o “bomba meteorológica”|publicació=RAM-Revista del Aficionado a la Meteorología|data=28 octubre 2013|volum=octubre 2013|url=http://www.tiempo.com/ram/4070/el-concepto-de-ciclognesis-explosiva/|consulta=5 març 2014}}', "{{ref-notícia|cognom=Bernis|nom=M|títol=Qui s'ha inventat la ciclogènesi explosiva?|publicació=Diari Ara|url=http://www.ara.cat/societat/meteo/sha-inventat-ciclogenesi-explosiva_0_1054094656.html|consulta=5 març 2014|data=25 desembre 2013}}", '{{ref-publicació|cognom=Servei Meteorològic de Catalunya|títol=Resum mensual. Febrer 2010|publicació=Butlletins climàtics|data=Març 2010|url=http://www20.gencat.cat/docs/meteocat/Continguts/Climatologia/Butlletins%20i%20resums%20climatics/Butlletins%20mensuals/2010/pdf/ButlletiFebrer10.pdf|consulta=5 març 2014|arxiuurl=https://web.archive.org/web/20140305171041/http://www20.gencat.cat/docs/meteocat/Continguts/Climatologia/Butlletins%20i%20resums%20climatics/Butlletins%20mensuals/2010/pdf/ButlletiFebrer10.pdf|arxiudata=5 de març 2014}}', "{{ref-notícia|cognom=VilaWeb|títol=Tempesta 'Xynthia': destrosses i ferits a Catalunya Nord|publicació=VilaWeb|url=http://www.vilaweb.cat/noticia/3696118/20100301/tempesta-xynthia-causa-destroces-catalunya-nord.html|consulta=5 març 2014|data=1 març 2010}}", '{{ref-notícia|cognom=Portal informatiu de TV3|nom=3/24|títol=50\xa0morts i 9 desapareguts a França pel pas de la tempesta "Xynthia"|url=http://www.324.cat/noticia/548185/societat/50-morts-i-9-desapareguts-a-Franca-pel-pas-de-la-tempesta-Xynthia|consulta=5 març 2014|data=3/3/2010}}', '{{ref-notícia|cognom=Libération|títol=Xynthia, retour sur la tempête|publicació=Libération|url=http://www.liberation.fr/tempete-xynthia,99875|consulta=5 març 2014|data=febrer i març 2010}} {{Webarchive|url=https://web.archive.org/web/20140305165824/http://www.liberation.fr/tempete-xynthia,99875 |date=5 de març 2014 }} {{Ref-web |url=http://www.liberation.fr/tempete-xynthia,99875 |títol=Còpia arxivada |consulta=2014-03-05 |arxiuurl=https://web.archive.org/web/20140305165824/http://www.liberation.fr/tempete-xynthia,99875 |arxiudata=2014-03-05}}'], box_matches: ['[[Fitxer:BraerStorm1993.png|miniatura|El gener de 1993 es creà una tempesta que va arribar a un mínim històric de 913 [[mbar]]', '[[borrasca]]', '[[Service météorologique du Canada]]', '[[Environnement Canada]]', '[[Categoria:Meteorologia|Ciclogènesi explosiva]]'], message: [[Fitxer:BraerStorm1993.png|miniatura|El gener de 1993 es creà una tempesta que va arribar a un mínim històric de 913 [[mbar]] (hPa).]] +Una '''ciclogènesi explosiva''' és una [[borrasca]] que s'aprofundeix molt ràpidament, amb una variació de més 24 hectopascals (hPa) en menys de 24 hores que pot generar forts vents amb velocitat fins a 140 kilòmetres per hora.{{ref-web|títol= Une «bombe météorologique» déclenche la panique au Canada et aux États-Unis | url= http://ec.gc.ca/meteo-weather/default.asp?lang=Fr&n=47397091-1 |lloc= [[Service météorologique du Canada]] |editor= [[Environnement Canada]] |consulta= 26 desembre 2013}} Per a les latituds on es troba Catalunya, aquesta definició es relaxa i engloba caigudes de pressió d'uns 20 hPa en 24 hores, o fins i tot submúltiples d'ella, per exemple 9-10 hPa en 12h.{{ref-publicació|cognom=Martín León|nom=F|títol=El concepto de ciclogénesis explosiva o “bomba meteorológica”|publicació=RAM-Revista del Aficionado a la Meteorología|data=28 octubre 2013|volum=octubre 2013|url=http://www.tiempo.com/ram/4070/el-concepto-de-ciclognesis-explosiva/|consulta=5 març 2014}} + +El concepte va ser proposat l'any 1980 pels investigadors americans Fred Sanders i John R. Gyakum, que van parlar de "meteorological bomb". L'ús a Catalunya va fer-se popular a partir del gener del 2009{{ref-notícia|cognom=Bernis|nom=M|títol=Qui s'ha inventat la ciclogènesi explosiva?|publicació=Diari Ara|url=http://www.ara.cat/societat/meteo/sha-inventat-ciclogenesi-explosiva_0_1054094656.html|consulta=5 març 2014|data=25 desembre 2013}} amb el pas del cicló Klaus. + +A finals de febrer de 2010 la depressió Xynthia,{{ref-publicació|cognom=Servei Meteorològic de Catalunya|títol=Resum mensual. Febrer 2010|publicació=Butlletins climàtics|data=Març 2010|url=http://www20.gencat.cat/docs/meteocat/Continguts/Climatologia/Butlletins%20i%20resums%20climatics/Butlletins%20mensuals/2010/pdf/ButlletiFebrer10.pdf|consulta=5 març 2014|arxiuurl=https://web.archive.org/web/20140305171041/http://www20.gencat.cat/docs/meteocat/Continguts/Climatologia/Butlletins%20i%20resums%20climatics/Butlletins%20mensuals/2010/pdf/ButlletiFebrer10.pdf|arxiudata=5 de març 2014}} fruit d'una nova ciclogènesi explosiva, va causar destrosses al seu pas per Catalunya{{ref-notícia|cognom=VilaWeb|títol=Tempesta 'Xynthia': destrosses i ferits a Catalunya Nord|publicació=VilaWeb|url=http://www.vilaweb.cat/noticia/3696118/20100301/tempesta-xynthia-causa-destroces-catalunya-nord.html|consulta=5 març 2014|data=1 març 2010}} i nombroses víctimes a França.{{ref-notícia|cognom=Portal informatiu de TV3|nom=3/24|títol=50 morts i 9 desapareguts a França pel pas de la tempesta "Xynthia"|url=http://www.324.cat/noticia/548185/societat/50-morts-i-9-desapareguts-a-Franca-pel-pas-de-la-tempesta-Xynthia|consulta=5 març 2014|data=3/3/2010}}{{ref-notícia|cognom=Libération|títol=Xynthia, retour sur la tempête|publicació=Libération|url=http://www.liberation.fr/tempete-xynthia,99875|consulta=5 març 2014|data=febrer i març 2010}} {{Webarchive|url=https://web.archive.org/web/20140305165824/http://www.liberation.fr/tempete-xynthia,99875 |date=5 de març 2014 }} {{Ref-web |url=http://www.liberation.fr/tempete-xynthia,99875 |títol=Còpia arxivada |consulta=2014-03-05 |arxiuurl=https://web.archive.org/web/20140305165824/http://www.liberation.fr/tempete-xynthia,99875 |arxiudata=2014-03-05}} + +== Referències == +{{Referències}} + +{{Autoritat}} + +[[Categoria:Meteorologia|Ciclogènesi explosiva]] +[Try #0] Failed to fetch sample 5367113 in VC:s3://gui/data_20250328/android/images/. Exception: ref_matches: ['{{ref-web|títol= Une «bombe météorologique» déclenche la panique au Canada et aux États-Unis | url= http://ec.gc.ca/meteo-weather/default.asp?lang=Fr&n=47397091-1 |lloc= [[Service météorologique du Canada]] |editor= [[Environnement Canada]] |consulta= 26 desembre 2013}}', '{{ref-publicació|cognom=Martín León|nom=F|títol=El concepto de ciclogénesis explosiva o “bomba meteorológica”|publicació=RAM-Revista del Aficionado a la Meteorología|data=28 octubre 2013|volum=octubre 2013|url=http://www.tiempo.com/ram/4070/el-concepto-de-ciclognesis-explosiva/|consulta=5 març 2014}}', "{{ref-notícia|cognom=Bernis|nom=M|títol=Qui s'ha inventat la ciclogènesi explosiva?|publicació=Diari Ara|url=http://www.ara.cat/societat/meteo/sha-inventat-ciclogenesi-explosiva_0_1054094656.html|consulta=5 març 2014|data=25 desembre 2013}}", '{{ref-publicació|cognom=Servei Meteorològic de Catalunya|títol=Resum mensual. Febrer 2010|publicació=Butlletins climàtics|data=Març 2010|url=http://www20.gencat.cat/docs/meteocat/Continguts/Climatologia/Butlletins%20i%20resums%20climatics/Butlletins%20mensuals/2010/pdf/ButlletiFebrer10.pdf|consulta=5 març 2014|arxiuurl=https://web.archive.org/web/20140305171041/http://www20.gencat.cat/docs/meteocat/Continguts/Climatologia/Butlletins%20i%20resums%20climatics/Butlletins%20mensuals/2010/pdf/ButlletiFebrer10.pdf|arxiudata=5 de març 2014}}', "{{ref-notícia|cognom=VilaWeb|títol=Tempesta 'Xynthia': destrosses i ferits a Catalunya Nord|publicació=VilaWeb|url=http://www.vilaweb.cat/noticia/3696118/20100301/tempesta-xynthia-causa-destroces-catalunya-nord.html|consulta=5 març 2014|data=1 març 2010}}", '{{ref-notícia|cognom=Portal informatiu de TV3|nom=3/24|títol=50\xa0morts i 9 desapareguts a França pel pas de la tempesta "Xynthia"|url=http://www.324.cat/noticia/548185/societat/50-morts-i-9-desapareguts-a-Franca-pel-pas-de-la-tempesta-Xynthia|consulta=5 març 2014|data=3/3/2010}}', '{{ref-notícia|cognom=Libération|títol=Xynthia, retour sur la tempête|publicació=Libération|url=http://www.liberation.fr/tempete-xynthia,99875|consulta=5 març 2014|data=febrer i març 2010}} {{Webarchive|url=https://web.archive.org/web/20140305165824/http://www.liberation.fr/tempete-xynthia,99875 |date=5 de març 2014 }} {{Ref-web |url=http://www.liberation.fr/tempete-xynthia,99875 |títol=Còpia arxivada |consulta=2014-03-05 |arxiuurl=https://web.archive.org/web/20140305165824/http://www.liberation.fr/tempete-xynthia,99875 |arxiudata=2014-03-05}}'], box_matches: ['[[Fitxer:BraerStorm1993.png|miniatura|El gener de 1993 es creà una tempesta que va arribar a un mínim històric de 913 [[mbar]]', '[[borrasca]]', '[[Service météorologique du Canada]]', '[[Environnement Canada]]', '[[Categoria:Meteorologia|Ciclogènesi explosiva]]'], message: [[Fitxer:BraerStorm1993.png|miniatura|El gener de 1993 es creà una tempesta que va arribar a un mínim històric de 913 [[mbar]] (hPa).]] +Una '''ciclogènesi explosiva''' és una [[borrasca]] que s'aprofundeix molt ràpidament, amb una variació de més 24 hectopascals (hPa) en menys de 24 hores que pot generar forts vents amb velocitat fins a 140 kilòmetres per hora.{{ref-web|títol= Une «bombe météorologique» déclenche la panique au Canada et aux États-Unis | url= http://ec.gc.ca/meteo-weather/default.asp?lang=Fr&n=47397091-1 |lloc= [[Service météorologique du Canada]] |editor= [[Environnement Canada]] |consulta= 26 desembre 2013}} Per a les latituds on es troba Catalunya, aquesta definició es relaxa i engloba caigudes de pressió d'uns 20 hPa en 24 hores, o fins i tot submúltiples d'ella, per exemple 9-10 hPa en 12h.{{ref-publicació|cognom=Martín León|nom=F|títol=El concepto de ciclogénesis explosiva o “bomba meteorológica”|publicació=RAM-Revista del Aficionado a la Meteorología|data=28 octubre 2013|volum=octubre 2013|url=http://www.tiempo.com/ram/4070/el-concepto-de-ciclognesis-explosiva/|consulta=5 març 2014}} + +El concepte va ser proposat l'any 1980 pels investigadors americans Fred Sanders i John R. Gyakum, que van parlar de "meteorological bomb". L'ús a Catalunya va fer-se popular a partir del gener del 2009{{ref-notícia|cognom=Bernis|nom=M|títol=Qui s'ha inventat la ciclogènesi explosiva?|publicació=Diari Ara|url=http://www.ara.cat/societat/meteo/sha-inventat-ciclogenesi-explosiva_0_1054094656.html|consulta=5 març 2014|data=25 desembre 2013}} amb el pas del cicló Klaus. + +A finals de febrer de 2010 la depressió Xynthia,{{ref-publicació|cognom=Servei Meteorològic de Catalunya|títol=Resum mensual. Febrer 2010|publicació=Butlletins climàtics|data=Març 2010|url=http://www20.gencat.cat/docs/meteocat/Continguts/Climatologia/Butlletins%20i%20resums%20climatics/Butlletins%20mensuals/2010/pdf/ButlletiFebrer10.pdf|consulta=5 març 2014|arxiuurl=https://web.archive.org/web/20140305171041/http://www20.gencat.cat/docs/meteocat/Continguts/Climatologia/Butlletins%20i%20resums%20climatics/Butlletins%20mensuals/2010/pdf/ButlletiFebrer10.pdf|arxiudata=5 de març 2014}} fruit d'una nova ciclogènesi explosiva, va causar destrosses al seu pas per Catalunya{{ref-notícia|cognom=VilaWeb|títol=Tempesta 'Xynthia': destrosses i ferits a Catalunya Nord|publicació=VilaWeb|url=http://www.vilaweb.cat/noticia/3696118/20100301/tempesta-xynthia-causa-destroces-catalunya-nord.html|consulta=5 març 2014|data=1 març 2010}} i nombroses víctimes a França.{{ref-notícia|cognom=Portal informatiu de TV3|nom=3/24|títol=50 morts i 9 desapareguts a França pel pas de la tempesta "Xynthia"|url=http://www.324.cat/noticia/548185/societat/50-morts-i-9-desapareguts-a-Franca-pel-pas-de-la-tempesta-Xynthia|consulta=5 març 2014|data=3/3/2010}}{{ref-notícia|cognom=Libération|títol=Xynthia, retour sur la tempête|publicació=Libération|url=http://www.liberation.fr/tempete-xynthia,99875|consulta=5 març 2014|data=febrer i març 2010}} {{Webarchive|url=https://web.archive.org/web/20140305165824/http://www.liberation.fr/tempete-xynthia,99875 |date=5 de març 2014 }} {{Ref-web |url=http://www.liberation.fr/tempete-xynthia,99875 |títol=Còpia arxivada |consulta=2014-03-05 |arxiuurl=https://web.archive.org/web/20140305165824/http://www.liberation.fr/tempete-xynthia,99875 |arxiudata=2014-03-05}} + +== Referències == +{{Referències}} + +{{Autoritat}} + +[[Categoria:Meteorologia|Ciclogènesi explosiva]] +Problematic sample: {'image': 'wiki/Cycle_0_Iter0_seq10/images/screenshot-37-1743222050.6585867-before.png', 'conversations': [{'from': 'human', 'value': '\nExtract and display the text located within the specified area: [[0, 121, 981, 924]]'}, {'from': 'gpt', 'value': '[[Fitxer:BraerStorm1993.png|miniatura|El gener de 1993 es creà una tempesta que va arribar a un mínim històric de 913 [[mbar]] (hPa).]]\nUna \'\'\'ciclogènesi explosiva\'\'\' és una [[borrasca]] que s\'aprofundeix molt ràpidament, amb una variació de més 24 hectopascals (hPa) en menys de 24 hores que pot generar forts vents amb velocitat fins a 140 kilòmetres per hora.{{ref-web|títol= Une «bombe météorologique» déclenche la panique au Canada et aux États-Unis | url= http://ec.gc.ca/meteo-weather/default.asp?lang=Fr&n=47397091-1 |lloc= [[Service météorologique du Canada]] |editor= [[Environnement Canada]] |consulta= 26 desembre 2013}} Per a les latituds on es troba Catalunya, aquesta definició es relaxa i engloba caigudes de pressió d\'uns 20 hPa en 24 hores, o fins i tot submúltiples d\'ella, per exemple 9-10 hPa en 12h.{{ref-publicació|cognom=Martín León|nom=F|títol=El concepto de ciclogénesis explosiva o “bomba meteorológica”|publicació=RAM-Revista del Aficionado a la Meteorología|data=28 octubre 2013|volum=octubre 2013|url=http://www.tiempo.com/ram/4070/el-concepto-de-ciclognesis-explosiva/|consulta=5 març 2014}}\n\nEl concepte va ser proposat l\'any 1980 pels investigadors americans Fred Sanders i John R. Gyakum, que van parlar de "meteorological bomb". L\'ús a Catalunya va fer-se popular a partir del gener del 2009{{ref-notícia|cognom=Bernis|nom=M|títol=Qui s\'ha inventat la ciclogènesi explosiva?|publicació=Diari Ara|url=http://www.ara.cat/societat/meteo/sha-inventat-ciclogenesi-explosiva_0_1054094656.html|consulta=5 març 2014|data=25 desembre 2013}} amb el pas del cicló Klaus.\n\nA finals de febrer de 2010 la depressió Xynthia,{{ref-publicació|cognom=Servei Meteorològic de Catalunya|títol=Resum mensual. Febrer 2010|publicació=Butlletins climàtics|data=Març 2010|url=http://www20.gencat.cat/docs/meteocat/Continguts/Climatologia/Butlletins%20i%20resums%20climatics/Butlletins%20mensuals/2010/pdf/ButlletiFebrer10.pdf|consulta=5 març 2014|arxiuurl=https://web.archive.org/web/20140305171041/http://www20.gencat.cat/docs/meteocat/Continguts/Climatologia/Butlletins%20i%20resums%20climatics/Butlletins%20mensuals/2010/pdf/ButlletiFebrer10.pdf|arxiudata=5 de març 2014}} fruit d\'una nova ciclogènesi explosiva, va causar destrosses al seu pas per Catalunya{{ref-notícia|cognom=VilaWeb|títol=Tempesta \'Xynthia\': destrosses i ferits a Catalunya Nord|publicació=VilaWeb|url=http://www.vilaweb.cat/noticia/3696118/20100301/tempesta-xynthia-causa-destroces-catalunya-nord.html|consulta=5 març 2014|data=1 març 2010}} i nombroses víctimes a França.{{ref-notícia|cognom=Portal informatiu de TV3|nom=3/24|títol=50\xa0morts i 9 desapareguts a França pel pas de la tempesta "Xynthia"|url=http://www.324.cat/noticia/548185/societat/50-morts-i-9-desapareguts-a-Franca-pel-pas-de-la-tempesta-Xynthia|consulta=5 març 2014|data=3/3/2010}}{{ref-notícia|cognom=Libération|títol=Xynthia, retour sur la tempête|publicació=Libération|url=http://www.liberation.fr/tempete-xynthia,99875|consulta=5 març 2014|data=febrer i març 2010}} {{Webarchive|url=https://web.archive.org/web/20140305165824/http://www.liberation.fr/tempete-xynthia,99875 |date=5 de març 2014 }} {{Ref-web |url=http://www.liberation.fr/tempete-xynthia,99875 |títol=Còpia arxivada |consulta=2014-03-05 |arxiuurl=https://web.archive.org/web/20140305165824/http://www.liberation.fr/tempete-xynthia,99875 |arxiudata=2014-03-05}}\n\n== Referències ==\n{{Referències}}\n\n{{Autoritat}}\n\n[[Categoria:Meteorologia|Ciclogènesi explosiva]]'}], 'width': 1280, 'height': 2856} + 76%|███████▋ | 9301/12188 [40:21<5:42:08, 7.11s/it] {'loss': 0.3029, 'grad_norm': 0.6556130747892462, 'learning_rate': 1.4016195147841483e-06, 'epoch': 0.76} + 76%|███████▋ | 9301/12188 [40:21<5:42:08, 7.11s/it] 76%|███████▋ | 9302/12188 [40:29<5:44:52, 7.17s/it] {'loss': 0.3411, 'grad_norm': 0.6825410212042119, 'learning_rate': 1.4006971079912352e-06, 'epoch': 0.76} + 76%|███████▋ | 9302/12188 [40:29<5:44:52, 7.17s/it] 76%|███████▋ | 9303/12188 [40:36<5:43:24, 7.14s/it] {'loss': 0.2944, 'grad_norm': 0.6624327897868582, 'learning_rate': 1.399774955375311e-06, 'epoch': 0.76} + 76%|███████▋ | 9303/12188 [40:36<5:43:24, 7.14s/it] 76%|███████▋ | 9304/12188 [40:43<5:39:10, 7.06s/it] {'loss': 0.2981, 'grad_norm': 0.6817195041752729, 'learning_rate': 1.3988530570014952e-06, 'epoch': 0.76} + 76%|███████▋ | 9304/12188 [40:43<5:39:10, 7.06s/it] 76%|███████▋ | 9305/12188 [40:50<5:36:40, 7.01s/it] {'loss': 0.2702, 'grad_norm': 0.7031522364979518, 'learning_rate': 1.3979314129348898e-06, 'epoch': 0.76} + 76%|███████▋ | 9305/12188 [40:50<5:36:40, 7.01s/it] 76%|███████▋ | 9306/12188 [40:56<5:33:17, 6.94s/it] {'loss': 0.3045, 'grad_norm': 0.7062412021405754, 'learning_rate': 1.3970100232405819e-06, 'epoch': 0.76} + 76%|███████▋ | 9306/12188 [40:56<5:33:17, 6.94s/it] 76%|███████▋ | 9307/12188 [41:05<5:58:50, 7.47s/it] {'loss': 0.2753, 'grad_norm': 0.7586326197753792, 'learning_rate': 1.396088887983636e-06, 'epoch': 0.76} + 76%|███████▋ | 9307/12188 [41:05<5:58:50, 7.47s/it] 76%|███████▋ | 9308/12188 [41:12<5:58:40, 7.47s/it] {'loss': 0.3013, 'grad_norm': 0.778740765910696, 'learning_rate': 1.395168007229103e-06, 'epoch': 0.76} + 76%|███████▋ | 9308/12188 [41:13<5:58:40, 7.47s/it] 76%|███████▋ | 9309/12188 [41:20<5:58:19, 7.47s/it] {'loss': 0.2717, 'grad_norm': 0.6947346984143363, 'learning_rate': 1.3942473810420153e-06, 'epoch': 0.76} + 76%|███████▋ | 9309/12188 [41:20<5:58:19, 7.47s/it] 76%|███████▋ | 9310/12188 [41:27<5:46:05, 7.22s/it] {'loss': 0.2853, 'grad_norm': 0.7747584240322248, 'learning_rate': 1.3933270094873824e-06, 'epoch': 0.76} + 76%|███████▋ | 9310/12188 [41:27<5:46:05, 7.22s/it] 76%|███████▋ | 9311/12188 [41:34<5:45:26, 7.20s/it] {'loss': 0.2776, 'grad_norm': 0.7252900860257016, 'learning_rate': 1.3924068926302038e-06, 'epoch': 0.76} + 76%|███████▋ | 9311/12188 [41:34<5:45:26, 7.20s/it] 76%|███████▋ | 9312/12188 [41:41<5:46:53, 7.24s/it] {'loss': 0.2963, 'grad_norm': 0.6829652120912202, 'learning_rate': 1.3914870305354522e-06, 'epoch': 0.76} + 76%|███████▋ | 9312/12188 [41:41<5:46:53, 7.24s/it] 76%|███████▋ | 9313/12188 [41:48<5:44:16, 7.18s/it] {'loss': 0.2821, 'grad_norm': 0.6835369330934753, 'learning_rate': 1.3905674232680893e-06, 'epoch': 0.76} + 76%|███████▋ | 9313/12188 [41:48<5:44:16, 7.18s/it] 76%|███████▋ | 9314/12188 [41:56<5:47:51, 7.26s/it] {'loss': 0.3163, 'grad_norm': 0.6865494180502284, 'learning_rate': 1.3896480708930576e-06, 'epoch': 0.76} + 76%|███████▋ | 9314/12188 [41:56<5:47:51, 7.26s/it] 76%|███████▋ | 9315/12188 [42:02<5:38:43, 7.07s/it] {'loss': 0.336, 'grad_norm': 0.6969587291620375, 'learning_rate': 1.3887289734752769e-06, 'epoch': 0.76} + 76%|███████▋ | 9315/12188 [42:02<5:38:43, 7.07s/it] 76%|███████▋ | 9316/12188 [42:10<5:43:49, 7.18s/it] {'loss': 0.3252, 'grad_norm': 0.7505836128439293, 'learning_rate': 1.387810131079656e-06, 'epoch': 0.76} + 76%|███████▋ | 9316/12188 [42:10<5:43:49, 7.18s/it] 76%|███████▋ | 9317/12188 [42:17<5:42:55, 7.17s/it] {'loss': 0.2938, 'grad_norm': 0.7104277433832354, 'learning_rate': 1.38689154377108e-06, 'epoch': 0.76} + 76%|███████▋ | 9317/12188 [42:17<5:42:55, 7.17s/it] 76%|███████▋ | 9318/12188 [42:24<5:46:48, 7.25s/it] {'loss': 0.2919, 'grad_norm': 0.653500720812662, 'learning_rate': 1.3859732116144164e-06, 'epoch': 0.76} + 76%|███████▋ | 9318/12188 [42:24<5:46:48, 7.25s/it] 76%|███████▋ | 9319/12188 [42:33<6:10:29, 7.75s/it] {'loss': 0.3165, 'grad_norm': 0.9718745621658281, 'learning_rate': 1.3850551346745207e-06, 'epoch': 0.76} + 76%|███████▋ | 9319/12188 [42:33<6:10:29, 7.75s/it] 76%|███████▋ | 9320/12188 [42:40<5:55:36, 7.44s/it] {'loss': 0.2818, 'grad_norm': 0.6731560548997546, 'learning_rate': 1.384137313016221e-06, 'epoch': 0.76} + 76%|███████▋ | 9320/12188 [42:40<5:55:36, 7.44s/it] 76%|███████▋ | 9321/12188 [42:47<5:51:04, 7.35s/it] {'loss': 0.2611, 'grad_norm': 0.6556983461936733, 'learning_rate': 1.3832197467043367e-06, 'epoch': 0.76} + 76%|███████▋ | 9321/12188 [42:47<5:51:04, 7.35s/it] 76%|███████▋ | 9322/12188 [42:54<5:42:50, 7.18s/it] {'loss': 0.3211, 'grad_norm': 0.7291156751604972, 'learning_rate': 1.3823024358036618e-06, 'epoch': 0.76} + 76%|███████▋ | 9322/12188 [42:54<5:42:50, 7.18s/it] 76%|███████▋ | 9323/12188 [43:01<5:42:05, 7.16s/it] {'loss': 0.3045, 'grad_norm': 0.6923057278527324, 'learning_rate': 1.3813853803789767e-06, 'epoch': 0.76} + 76%|███████▋ | 9323/12188 [43:01<5:42:05, 7.16s/it] 77%|███████▋ | 9324/12188 [43:09<5:53:43, 7.41s/it] {'loss': 0.3323, 'grad_norm': 0.7221946034797314, 'learning_rate': 1.3804685804950434e-06, 'epoch': 0.76} + 77%|███████▋ | 9324/12188 [43:09<5:53:43, 7.41s/it] 77%|███████▋ | 9325/12188 [43:16<5:49:34, 7.33s/it] {'loss': 0.2973, 'grad_norm': 0.7103821002211725, 'learning_rate': 1.3795520362166026e-06, 'epoch': 0.77} + 77%|███████▋ | 9325/12188 [43:16<5:49:34, 7.33s/it] 77%|███████▋ | 9326/12188 [43:23<5:44:01, 7.21s/it] {'loss': 0.2868, 'grad_norm': 0.7061782110803665, 'learning_rate': 1.3786357476083801e-06, 'epoch': 0.77} + 77%|███████▋ | 9326/12188 [43:23<5:44:01, 7.21s/it] 77%|███████▋ | 9327/12188 [43:30<5:40:24, 7.14s/it] {'loss': 0.3523, 'grad_norm': 0.7517841206040957, 'learning_rate': 1.3777197147350845e-06, 'epoch': 0.77} + 77%|███████▋ | 9327/12188 [43:30<5:40:24, 7.14s/it] 77%|███████▋ | 9328/12188 [43:38<5:47:02, 7.28s/it] {'loss': 0.3216, 'grad_norm': 0.6944571579305808, 'learning_rate': 1.3768039376614017e-06, 'epoch': 0.77} + 77%|███████▋ | 9328/12188 [43:38<5:47:02, 7.28s/it] 77%|███████▋ | 9329/12188 [43:44<5:41:42, 7.17s/it] {'loss': 0.3345, 'grad_norm': 0.752504510625696, 'learning_rate': 1.3758884164520047e-06, 'epoch': 0.77} + 77%|███████▋ | 9329/12188 [43:44<5:41:42, 7.17s/it] 77%|███████▋ | 9330/12188 [43:51<5:39:13, 7.12s/it] {'loss': 0.263, 'grad_norm': 0.7890981420748432, 'learning_rate': 1.3749731511715442e-06, 'epoch': 0.77} + 77%|███████▋ | 9330/12188 [43:51<5:39:13, 7.12s/it] 77%|███████▋ | 9331/12188 [43:58<5:32:19, 6.98s/it] {'loss': 0.325, 'grad_norm': 0.7316067507835474, 'learning_rate': 1.374058141884657e-06, 'epoch': 0.77} + 77%|███████▋ | 9331/12188 [43:58<5:32:19, 6.98s/it] 77%|███████▋ | 9332/12188 [44:06<5:43:53, 7.22s/it] {'loss': 0.2994, 'grad_norm': 0.7272804731283538, 'learning_rate': 1.373143388655958e-06, 'epoch': 0.77} + 77%|███████▋ | 9332/12188 [44:06<5:43:53, 7.22s/it] 77%|███████▋ | 9333/12188 [44:13<5:47:39, 7.31s/it] {'loss': 0.3079, 'grad_norm': 0.7155462294976511, 'learning_rate': 1.3722288915500448e-06, 'epoch': 0.77} + 77%|███████▋ | 9333/12188 [44:13<5:47:39, 7.31s/it] 77%|███████▋ | 9334/12188 [44:20<5:42:50, 7.21s/it] {'loss': 0.2754, 'grad_norm': 0.6591412198448748, 'learning_rate': 1.3713146506315006e-06, 'epoch': 0.77} + 77%|███████▋ | 9334/12188 [44:20<5:42:50, 7.21s/it] 77%|███████▋ | 9335/12188 [44:28<5:45:39, 7.27s/it] {'loss': 0.3128, 'grad_norm': 0.6672666749803127, 'learning_rate': 1.3704006659648838e-06, 'epoch': 0.77} + 77%|███████▋ | 9335/12188 [44:28<5:45:39, 7.27s/it] 77%|███████▋ | 9336/12188 [44:38<6:20:19, 8.00s/it] {'loss': 0.3052, 'grad_norm': 0.6936766321046661, 'learning_rate': 1.3694869376147406e-06, 'epoch': 0.77} + 77%|███████▋ | 9336/12188 [44:38<6:20:19, 8.00s/it] 77%|███████▋ | 9337/12188 [44:45<6:09:58, 7.79s/it] {'loss': 0.2985, 'grad_norm': 0.785297340586933, 'learning_rate': 1.368573465645599e-06, 'epoch': 0.77} + 77%|███████▋ | 9337/12188 [44:45<6:09:58, 7.79s/it] 77%|███████▋ | 9338/12188 [44:52<5:59:22, 7.57s/it] {'loss': 0.3156, 'grad_norm': 0.7609096255149898, 'learning_rate': 1.3676602501219626e-06, 'epoch': 0.77} + 77%|███████▋ | 9338/12188 [44:52<5:59:22, 7.57s/it] 77%|███████▋ | 9339/12188 [44:59<5:59:29, 7.57s/it] {'loss': 0.309, 'grad_norm': 0.7181366738404591, 'learning_rate': 1.366747291108324e-06, 'epoch': 0.77} + 77%|███████▋ | 9339/12188 [44:59<5:59:29, 7.57s/it] 77%|███████▋ | 9340/12188 [45:07<5:57:31, 7.53s/it] {'loss': 0.2892, 'grad_norm': 0.6566488796788624, 'learning_rate': 1.365834588669156e-06, 'epoch': 0.77} + 77%|███████▋ | 9340/12188 [45:07<5:57:31, 7.53s/it] 77%|███████▋ | 9341/12188 [45:14<5:58:36, 7.56s/it] {'loss': 0.3337, 'grad_norm': 0.6779402952106534, 'learning_rate': 1.3649221428689092e-06, 'epoch': 0.77} + 77%|███████▋ | 9341/12188 [45:14<5:58:36, 7.56s/it] 77%|███████▋ | 9342/12188 [45:24<6:26:06, 8.14s/it] {'loss': 0.2757, 'grad_norm': 0.7451083715443181, 'learning_rate': 1.364009953772022e-06, 'epoch': 0.77} + 77%|███████▋ | 9342/12188 [45:24<6:26:06, 8.14s/it] 77%|███████▋ | 9343/12188 [45:31<6:12:52, 7.86s/it] {'loss': 0.3188, 'grad_norm': 0.646782846340555, 'learning_rate': 1.3630980214429085e-06, 'epoch': 0.77} + 77%|███████▋ | 9343/12188 [45:31<6:12:52, 7.86s/it] 77%|███████▋ | 9344/12188 [45:39<6:06:07, 7.72s/it] {'loss': 0.2896, 'grad_norm': 0.6756493058840775, 'learning_rate': 1.362186345945971e-06, 'epoch': 0.77} + 77%|███████▋ | 9344/12188 [45:39<6:06:07, 7.72s/it] 77%|███████▋ | 9345/12188 [45:47<6:14:58, 7.91s/it] {'loss': 0.2944, 'grad_norm': 0.6609077241433914, 'learning_rate': 1.3612749273455894e-06, 'epoch': 0.77} + 77%|███████▋ | 9345/12188 [45:47<6:14:58, 7.91s/it] 77%|███████▋ | 9346/12188 [45:54<6:05:53, 7.72s/it] {'loss': 0.3194, 'grad_norm': 0.8457189772739062, 'learning_rate': 1.3603637657061247e-06, 'epoch': 0.77} + 77%|███████▋ | 9346/12188 [45:54<6:05:53, 7.72s/it] 77%|███████▋ | 9347/12188 [46:01<5:54:50, 7.49s/it] {'loss': 0.2576, 'grad_norm': 0.8069969628645388, 'learning_rate': 1.359452861091925e-06, 'epoch': 0.77} + 77%|███████▋ | 9347/12188 [46:01<5:54:50, 7.49s/it] 77%|███████▋ | 9348/12188 [46:08<5:48:10, 7.36s/it] {'loss': 0.3191, 'grad_norm': 0.7138507536526255, 'learning_rate': 1.3585422135673131e-06, 'epoch': 0.77} + 77%|███████▋ | 9348/12188 [46:08<5:48:10, 7.36s/it] 77%|███████▋ | 9349/12188 [46:15<5:37:26, 7.13s/it] {'loss': 0.3207, 'grad_norm': 0.8496686394766987, 'learning_rate': 1.3576318231966006e-06, 'epoch': 0.77} + 77%|███████▋ | 9349/12188 [46:15<5:37:26, 7.13s/it] 77%|███████▋ | 9350/12188 [46:22<5:41:49, 7.23s/it] {'loss': 0.291, 'grad_norm': 0.5973518418274022, 'learning_rate': 1.3567216900440777e-06, 'epoch': 0.77} + 77%|███████▋ | 9350/12188 [46:22<5:41:49, 7.23s/it] 77%|███████▋ | 9351/12188 [46:29<5:39:10, 7.17s/it] {'loss': 0.3019, 'grad_norm': 0.8317819840425363, 'learning_rate': 1.3558118141740145e-06, 'epoch': 0.77} + 77%|███████▋ | 9351/12188 [46:29<5:39:10, 7.17s/it] 77%|███████▋ | 9352/12188 [46:37<5:40:27, 7.20s/it] {'loss': 0.3004, 'grad_norm': 0.7024110115706885, 'learning_rate': 1.3549021956506676e-06, 'epoch': 0.77} + 77%|███████▋ | 9352/12188 [46:37<5:40:27, 7.20s/it] 77%|███████▋ | 9353/12188 [46:43<5:35:34, 7.10s/it] {'loss': 0.3285, 'grad_norm': 0.7109664548391333, 'learning_rate': 1.3539928345382697e-06, 'epoch': 0.77} + 77%|███████▋ | 9353/12188 [46:43<5:35:34, 7.10s/it] 77%|███████▋ | 9354/12188 [46:50<5:32:54, 7.05s/it] {'loss': 0.311, 'grad_norm': 0.7473452780012032, 'learning_rate': 1.3530837309010403e-06, 'epoch': 0.77} + 77%|███████▋ | 9354/12188 [46:50<5:32:54, 7.05s/it] 77%|███████▋ | 9355/12188 [46:59<5:51:40, 7.45s/it] {'loss': 0.3094, 'grad_norm': 0.6158085615505112, 'learning_rate': 1.3521748848031796e-06, 'epoch': 0.77} + 77%|███████▋ | 9355/12188 [46:59<5:51:40, 7.45s/it] 77%|███████▋ | 9356/12188 [47:06<5:46:10, 7.33s/it] {'loss': 0.3272, 'grad_norm': 0.6702463432259008, 'learning_rate': 1.3512662963088668e-06, 'epoch': 0.77} + 77%|███████▋ | 9356/12188 [47:06<5:46:10, 7.33s/it] 77%|███████▋ | 9357/12188 [47:14<5:53:14, 7.49s/it] {'loss': 0.3149, 'grad_norm': 0.7585281084946524, 'learning_rate': 1.3503579654822674e-06, 'epoch': 0.77} + 77%|███████▋ | 9357/12188 [47:14<5:53:14, 7.49s/it] 77%|███████▋ | 9358/12188 [47:20<5:40:12, 7.21s/it] {'loss': 0.3214, 'grad_norm': 0.7387432017688836, 'learning_rate': 1.3494498923875238e-06, 'epoch': 0.77} + 77%|███████▋ | 9358/12188 [47:20<5:40:12, 7.21s/it] 77%|███████▋ | 9359/12188 [47:27<5:38:08, 7.17s/it] {'loss': 0.3284, 'grad_norm': 0.8247021751889948, 'learning_rate': 1.3485420770887648e-06, 'epoch': 0.77} + 77%|███████▋ | 9359/12188 [47:27<5:38:08, 7.17s/it] 77%|███████▋ | 9360/12188 [47:35<5:38:40, 7.19s/it] {'loss': 0.2745, 'grad_norm': 0.6649404160714328, 'learning_rate': 1.3476345196500978e-06, 'epoch': 0.77} + 77%|███████▋ | 9360/12188 [47:35<5:38:40, 7.19s/it] 77%|███████▋ | 9361/12188 [47:42<5:35:09, 7.11s/it] {'loss': 0.2822, 'grad_norm': 0.6826433773316647, 'learning_rate': 1.3467272201356113e-06, 'epoch': 0.77} + 77%|███████▋ | 9361/12188 [47:42<5:35:09, 7.11s/it] 77%|███████▋ | 9362/12188 [47:49<5:37:02, 7.16s/it] {'loss': 0.288, 'grad_norm': 0.6793193905525505, 'learning_rate': 1.3458201786093795e-06, 'epoch': 0.77} + 77%|███████▋ | 9362/12188 [47:49<5:37:02, 7.16s/it] 77%|███████▋ | 9363/12188 [47:56<5:32:28, 7.06s/it] {'loss': 0.2547, 'grad_norm': 0.6255687785677982, 'learning_rate': 1.3449133951354571e-06, 'epoch': 0.77} + 77%|███████▋ | 9363/12188 [47:56<5:32:28, 7.06s/it] 77%|███████▋ | 9364/12188 [48:03<5:43:14, 7.29s/it] {'loss': 0.3047, 'grad_norm': 0.7031132752867626, 'learning_rate': 1.3440068697778774e-06, 'epoch': 0.77} + 77%|███████▋ | 9364/12188 [48:03<5:43:14, 7.29s/it] 77%|███████▋ | 9365/12188 [48:11<5:45:14, 7.34s/it] {'loss': 0.2971, 'grad_norm': 0.6868850121429243, 'learning_rate': 1.3431006026006594e-06, 'epoch': 0.77} + 77%|███████▋ | 9365/12188 [48:11<5:45:14, 7.34s/it] 77%|███████▋ | 9366/12188 [48:18<5:37:37, 7.18s/it] {'loss': 0.3363, 'grad_norm': 0.7057823064420108, 'learning_rate': 1.3421945936678004e-06, 'epoch': 0.77} + 77%|███████▋ | 9366/12188 [48:18<5:37:37, 7.18s/it] 77%|███████▋ | 9367/12188 [48:24<5:32:15, 7.07s/it] {'loss': 0.2953, 'grad_norm': 0.6931151615092911, 'learning_rate': 1.341288843043283e-06, 'epoch': 0.77} + 77%|███████▋ | 9367/12188 [48:24<5:32:15, 7.07s/it] 77%|███████▋ | 9368/12188 [48:33<5:47:02, 7.38s/it] {'loss': 0.2819, 'grad_norm': 0.7710520587645033, 'learning_rate': 1.3403833507910702e-06, 'epoch': 0.77} + 77%|███████▋ | 9368/12188 [48:33<5:47:02, 7.38s/it] 77%|███████▋ | 9369/12188 [48:41<5:57:50, 7.62s/it] {'loss': 0.2783, 'grad_norm': 0.696594459815586, 'learning_rate': 1.3394781169751042e-06, 'epoch': 0.77} + 77%|███████▋ | 9369/12188 [48:41<5:57:50, 7.62s/it] 77%|███████▋ | 9370/12188 [48:48<5:54:29, 7.55s/it] {'loss': 0.3044, 'grad_norm': 0.6967644646711653, 'learning_rate': 1.3385731416593144e-06, 'epoch': 0.77} + 77%|███████▋ | 9370/12188 [48:48<5:54:29, 7.55s/it] 77%|███████▋ | 9371/12188 [48:55<5:47:16, 7.40s/it] {'loss': 0.3003, 'grad_norm': 0.6980653422448246, 'learning_rate': 1.337668424907605e-06, 'epoch': 0.77} + 77%|███████▋ | 9371/12188 [48:55<5:47:16, 7.40s/it] 77%|███████▋ | 9372/12188 [49:02<5:39:55, 7.24s/it] {'loss': 0.345, 'grad_norm': 0.7561156476694205, 'learning_rate': 1.3367639667838689e-06, 'epoch': 0.77} + 77%|███████▋ | 9372/12188 [49:02<5:39:55, 7.24s/it] 77%|███████▋ | 9373/12188 [49:09<5:31:57, 7.08s/it] {'loss': 0.2931, 'grad_norm': 0.7417897505483574, 'learning_rate': 1.3358597673519757e-06, 'epoch': 0.77} + 77%|███████▋ | 9373/12188 [49:09<5:31:57, 7.08s/it] 77%|███████▋ | 9374/12188 [49:16<5:31:40, 7.07s/it] {'loss': 0.2689, 'grad_norm': 0.7093466237549991, 'learning_rate': 1.334955826675778e-06, 'epoch': 0.77} + 77%|███████▋ | 9374/12188 [49:16<5:31:40, 7.07s/it] 77%|███████▋ | 9375/12188 [49:23<5:26:18, 6.96s/it] {'loss': 0.3385, 'grad_norm': 0.7438925505412906, 'learning_rate': 1.3340521448191113e-06, 'epoch': 0.77} + 77%|███████▋ | 9375/12188 [49:23<5:26:18, 6.96s/it] 77%|███████▋ | 9376/12188 [49:30<5:28:04, 7.00s/it] {'loss': 0.3485, 'grad_norm': 0.7077094086519436, 'learning_rate': 1.3331487218457933e-06, 'epoch': 0.77} + 77%|███████▋ | 9376/12188 [49:30<5:28:04, 7.00s/it] 77%|███████▋ | 9377/12188 [49:37<5:26:29, 6.97s/it] {'loss': 0.3141, 'grad_norm': 0.6520126156043501, 'learning_rate': 1.3322455578196203e-06, 'epoch': 0.77} + 77%|███████▋ | 9377/12188 [49:37<5:26:29, 6.97s/it] 77%|███████▋ | 9378/12188 [49:43<5:25:50, 6.96s/it] {'loss': 0.2895, 'grad_norm': 0.6889673779878093, 'learning_rate': 1.3313426528043749e-06, 'epoch': 0.77} + 77%|███████▋ | 9378/12188 [49:43<5:25:50, 6.96s/it] 77%|███████▋ | 9379/12188 [49:50<5:20:43, 6.85s/it] {'loss': 0.304, 'grad_norm': 0.6496286862406304, 'learning_rate': 1.3304400068638157e-06, 'epoch': 0.77} + 77%|███████▋ | 9379/12188 [49:50<5:20:43, 6.85s/it] 77%|███████▋ | 9380/12188 [49:59<5:51:56, 7.52s/it] {'loss': 0.3139, 'grad_norm': 0.6387111222490852, 'learning_rate': 1.3295376200616871e-06, 'epoch': 0.77} + 77%|███████▋ | 9380/12188 [49:59<5:51:56, 7.52s/it] 77%|███████▋ | 9381/12188 [50:07<5:55:46, 7.60s/it] {'loss': 0.3096, 'grad_norm': 0.707088296227551, 'learning_rate': 1.3286354924617167e-06, 'epoch': 0.77} + 77%|███████▋ | 9381/12188 [50:07<5:55:46, 7.60s/it] 77%|███████▋ | 9382/12188 [50:15<6:01:18, 7.73s/it] {'loss': 0.292, 'grad_norm': 0.7108748668204429, 'learning_rate': 1.3277336241276079e-06, 'epoch': 0.77} + 77%|███████▋ | 9382/12188 [50:15<6:01:18, 7.73s/it] 77%|███████▋ | 9383/12188 [50:22<5:49:34, 7.48s/it] {'loss': 0.2793, 'grad_norm': 0.7344199469186936, 'learning_rate': 1.3268320151230518e-06, 'epoch': 0.77} + 77%|███████▋ | 9383/12188 [50:22<5:49:34, 7.48s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 90, in pil_loader + return img.convert("RGB") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 993, in convert + self.load() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 319, in load + raise _get_oserror(err_code, encoder=False) +OSError: unrecognized data stream contents when reading image file +[Try #0] Failed to fetch sample 6013696 in VC:s3://gui-agent/data_20250623/windows_augment/images. Exception: unrecognized data stream contents when reading image file +Problematic sample: {'image': 'autocad/20250508_161646_1/images/before_screenshot_1_id_73_internvl_element-caption_crop_0_grounding_instructions_point_o_paste.png', 'conversations': [{'from': 'human', 'value': "\nProvide the point located in: A 'Field' button with a small icon and white text on dark gray background, positioned in the Data section of the AutoCAD ribbon toolbar."}, {'from': 'gpt', 'value': "A 'Field' button with a small icon and white text on dark gray background, positioned in the Data section of the AutoCAD ribbon toolbar.[[649, 145]]"}], 'width': 3600, 'height': 2338} + 77%|███████▋ | 9384/12188 [50:29<5:45:15, 7.39s/it] {'loss': 0.3076, 'grad_norm': 0.7168324449805693, 'learning_rate': 1.325930665511716e-06, 'epoch': 0.77} + 77%|███████▋ | 9384/12188 [50:29<5:45:15, 7.39s/it] 77%|███████▋ | 9385/12188 [50:36<5:39:17, 7.26s/it] {'loss': 0.3114, 'grad_norm': 0.7180154158553844, 'learning_rate': 1.3250295753572557e-06, 'epoch': 0.77} + 77%|███████▋ | 9385/12188 [50:36<5:39:17, 7.26s/it] 77%|███████▋ | 9386/12188 [50:43<5:31:44, 7.10s/it] {'loss': 0.3278, 'grad_norm': 0.7014788992058931, 'learning_rate': 1.3241287447233003e-06, 'epoch': 0.77} + 77%|███████▋ | 9386/12188 [50:43<5:31:44, 7.10s/it] 77%|███████▋ | 9387/12188 [50:52<6:07:15, 7.87s/it] {'loss': 0.3398, 'grad_norm': 0.7275701801128694, 'learning_rate': 1.3232281736734692e-06, 'epoch': 0.77} + 77%|███████▋ | 9387/12188 [50:52<6:07:15, 7.87s/it] 77%|███████▋ | 9388/12188 [50:59<5:52:17, 7.55s/it] {'loss': 0.3409, 'grad_norm': 0.7323965117513407, 'learning_rate': 1.322327862271357e-06, 'epoch': 0.77} + 77%|███████▋ | 9388/12188 [50:59<5:52:17, 7.55s/it] 77%|███████▋ | 9389/12188 [51:06<5:44:18, 7.38s/it] {'loss': 0.294, 'grad_norm': 0.6675906045315905, 'learning_rate': 1.3214278105805412e-06, 'epoch': 0.77} + 77%|███████▋ | 9389/12188 [51:06<5:44:18, 7.38s/it] 77%|███████▋ | 9390/12188 [51:14<5:51:40, 7.54s/it] {'loss': 0.2849, 'grad_norm': 0.840671616209331, 'learning_rate': 1.320528018664583e-06, 'epoch': 0.77} + 77%|███████▋ | 9390/12188 [51:14<5:51:40, 7.54s/it] 77%|███████▋ | 9391/12188 [51:22<5:59:55, 7.72s/it] {'loss': 0.3495, 'grad_norm': 0.7387514484854402, 'learning_rate': 1.319628486587027e-06, 'epoch': 0.77} + 77%|███████▋ | 9391/12188 [51:22<5:59:55, 7.72s/it] 77%|███████▋ | 9392/12188 [51:32<6:28:08, 8.33s/it] {'loss': 0.2962, 'grad_norm': 0.7641005465468312, 'learning_rate': 1.3187292144113917e-06, 'epoch': 0.77} + 77%|███████▋ | 9392/12188 [51:32<6:28:08, 8.33s/it] 77%|███████▋ | 9393/12188 [51:39<6:06:29, 7.87s/it] {'loss': 0.3069, 'grad_norm': 0.8074664655158822, 'learning_rate': 1.3178302022011851e-06, 'epoch': 0.77} + 77%|███████▋ | 9393/12188 [51:39<6:06:29, 7.87s/it] 77%|███████▋ | 9394/12188 [51:52<7:21:30, 9.48s/it] {'loss': 0.2689, 'grad_norm': 0.7184463673683307, 'learning_rate': 1.316931450019896e-06, 'epoch': 0.77} + 77%|███████▋ | 9394/12188 [51:52<7:21:30, 9.48s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f3c3deefc40> +[Try #0] Failed to fetch sample 4644862 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f3c3deefc40> +Problematic sample: {'image': '20240823_045930_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Consumer Health Privacy'"}, {'from': 'gpt', 'value': '\nclick(x=0.4845, y=0.921)\n'}]} + 77%|███████▋ | 9395/12188 [51:59<6:42:25, 8.65s/it] {'loss': 0.3082, 'grad_norm': 0.7760776415987473, 'learning_rate': 1.3160329579309883e-06, 'epoch': 0.77} + 77%|███████▋ | 9395/12188 [51:59<6:42:25, 8.65s/it] 77%|███████▋ | 9396/12188 [52:06<6:16:38, 8.09s/it] {'loss': 0.2838, 'grad_norm': 0.6749117352712403, 'learning_rate': 1.3151347259979164e-06, 'epoch': 0.77} + 77%|███████▋ | 9396/12188 [52:06<6:16:38, 8.09s/it] 77%|███████▋ | 9397/12188 [52:14<6:16:02, 8.08s/it] {'loss': 0.3461, 'grad_norm': 0.7440487473978237, 'learning_rate': 1.3142367542841084e-06, 'epoch': 0.77} + 77%|███████▋ | 9397/12188 [52:14<6:16:02, 8.08s/it] 77%|███████▋ | 9398/12188 [52:20<5:54:23, 7.62s/it] {'loss': 0.2895, 'grad_norm': 0.6528065481952806, 'learning_rate': 1.3133390428529784e-06, 'epoch': 0.77} + 77%|███████▋ | 9398/12188 [52:20<5:54:23, 7.62s/it] 77%|███████▋ | 9399/12188 [52:27<5:44:16, 7.41s/it] {'loss': 0.3429, 'grad_norm': 0.7695461405298927, 'learning_rate': 1.3124415917679244e-06, 'epoch': 0.77} + 77%|███████▋ | 9399/12188 [52:27<5:44:16, 7.41s/it] 77%|███████▋ | 9400/12188 [52:34<5:38:59, 7.30s/it] {'loss': 0.2903, 'grad_norm': 0.6728451974861641, 'learning_rate': 1.3115444010923205e-06, 'epoch': 0.77} + 77%|███████▋ | 9400/12188 [52:34<5:38:59, 7.30s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 77%|███████▋ | 9401/12188 [52:40<5:25:42, 7.01s/it] {'loss': 0.6374, 'grad_norm': 0.6418926491579545, 'learning_rate': 1.310647470889525e-06, 'epoch': 0.77} + 77%|███████▋ | 9401/12188 [52:40<5:25:42, 7.01s/it] 77%|███████▋ | 9402/12188 [52:50<6:02:34, 7.81s/it] {'loss': 0.2858, 'grad_norm': 0.7664640728376813, 'learning_rate': 1.3097508012228765e-06, 'epoch': 0.77} + 77%|███████▋ | 9402/12188 [52:50<6:02:34, 7.81s/it] 77%|███████▋ | 9403/12188 [52:57<5:45:32, 7.44s/it] {'loss': 0.3262, 'grad_norm': 0.6469811520339486, 'learning_rate': 1.3088543921556974e-06, 'epoch': 0.77} + 77%|███████▋ | 9403/12188 [52:57<5:45:32, 7.44s/it] 77%|███████▋ | 9404/12188 [53:04<5:39:18, 7.31s/it] {'loss': 0.2768, 'grad_norm': 0.6262552537846414, 'learning_rate': 1.3079582437512922e-06, 'epoch': 0.77} + 77%|███████▋ | 9404/12188 [53:04<5:39:18, 7.31s/it] 77%|███████▋ | 9405/12188 [53:11<5:34:55, 7.22s/it] {'loss': 0.3297, 'grad_norm': 0.6552189960237065, 'learning_rate': 1.3070623560729429e-06, 'epoch': 0.77} + 77%|███████▋ | 9405/12188 [53:11<5:34:55, 7.22s/it] 77%|███████▋ | 9406/12188 [53:18<5:31:37, 7.15s/it] {'loss': 0.31, 'grad_norm': 0.6605208826202545, 'learning_rate': 1.3061667291839182e-06, 'epoch': 0.77} + 77%|███████▋ | 9406/12188 [53:18<5:31:37, 7.15s/it] 77%|███████▋ | 9407/12188 [53:25<5:36:54, 7.27s/it] {'loss': 0.2988, 'grad_norm': 0.740143318292247, 'learning_rate': 1.305271363147463e-06, 'epoch': 0.77} + 77%|███████▋ | 9407/12188 [53:25<5:36:54, 7.27s/it] 77%|███████▋ | 9408/12188 [53:32<5:33:10, 7.19s/it] {'loss': 0.311, 'grad_norm': 0.7348978931929337, 'learning_rate': 1.3043762580268078e-06, 'epoch': 0.77} + 77%|███████▋ | 9408/12188 [53:32<5:33:10, 7.19s/it] 77%|███████▋ | 9409/12188 [53:39<5:33:04, 7.19s/it] {'loss': 0.3125, 'grad_norm': 0.730445448272583, 'learning_rate': 1.3034814138851654e-06, 'epoch': 0.77} + 77%|███████▋ | 9409/12188 [53:39<5:33:04, 7.19s/it] 77%|███████▋ | 9410/12188 [53:46<5:24:08, 7.00s/it] {'loss': 0.2949, 'grad_norm': 0.6369258452095655, 'learning_rate': 1.3025868307857252e-06, 'epoch': 0.77} + 77%|███████▋ | 9410/12188 [53:46<5:24:08, 7.00s/it] 77%|███████▋ | 9411/12188 [53:53<5:22:19, 6.96s/it] {'loss': 0.3227, 'grad_norm': 0.6451568375437282, 'learning_rate': 1.3016925087916627e-06, 'epoch': 0.77} + 77%|███████▋ | 9411/12188 [53:53<5:22:19, 6.96s/it] 77%|███████▋ | 9412/12188 [54:05<6:34:26, 8.53s/it] {'loss': 0.2864, 'grad_norm': 0.6861094263420312, 'learning_rate': 1.3007984479661352e-06, 'epoch': 0.77} + 77%|███████▋ | 9412/12188 [54:05<6:34:26, 8.53s/it] 77%|███████▋ | 9413/12188 [54:15<6:49:54, 8.86s/it] {'loss': 0.3043, 'grad_norm': 0.6545808396235185, 'learning_rate': 1.2999046483722777e-06, 'epoch': 0.77} + 77%|███████▋ | 9413/12188 [54:15<6:49:54, 8.86s/it] 77%|███████▋ | 9414/12188 [54:22<6:30:37, 8.45s/it] {'loss': 0.3225, 'grad_norm': 0.7259407683990599, 'learning_rate': 1.2990111100732078e-06, 'epoch': 0.77} + 77%|███████▋ | 9414/12188 [54:22<6:30:37, 8.45s/it] 77%|███████▋ | 9415/12188 [54:29<6:11:22, 8.04s/it] {'loss': 0.2945, 'grad_norm': 0.7213591794388743, 'learning_rate': 1.2981178331320288e-06, 'epoch': 0.77} + 77%|███████▋ | 9415/12188 [54:29<6:11:22, 8.04s/it] 77%|███████▋ | 9416/12188 [54:36<5:53:28, 7.65s/it] {'loss': 0.33, 'grad_norm': 0.7241461177743153, 'learning_rate': 1.2972248176118197e-06, 'epoch': 0.77} + 77%|███████▋ | 9416/12188 [54:36<5:53:28, 7.65s/it] 77%|███████▋ | 9417/12188 [54:43<5:47:22, 7.52s/it] {'loss': 0.2775, 'grad_norm': 0.7361909852431496, 'learning_rate': 1.296332063575646e-06, 'epoch': 0.77} + 77%|███████▋ | 9417/12188 [54:43<5:47:22, 7.52s/it] 77%|███████▋ | 9418/12188 [54:50<5:37:12, 7.30s/it] {'loss': 0.3556, 'grad_norm': 0.7173016389039947, 'learning_rate': 1.2954395710865502e-06, 'epoch': 0.77} + 77%|███████▋ | 9418/12188 [54:50<5:37:12, 7.30s/it] 77%|███████▋ | 9419/12188 [54:57<5:34:51, 7.26s/it] {'loss': 0.3079, 'grad_norm': 0.6691832644342685, 'learning_rate': 1.294547340207562e-06, 'epoch': 0.77} + 77%|███████▋ | 9419/12188 [54:57<5:34:51, 7.26s/it] 77%|███████▋ | 9420/12188 [55:04<5:30:51, 7.17s/it] {'loss': 0.2907, 'grad_norm': 0.7093142633535597, 'learning_rate': 1.2936553710016858e-06, 'epoch': 0.77} + 77%|███████▋ | 9420/12188 [55:04<5:30:51, 7.17s/it] 77%|███████▋ | 9421/12188 [55:11<5:28:54, 7.13s/it] {'loss': 0.3241, 'grad_norm': 0.7136708321757962, 'learning_rate': 1.2927636635319125e-06, 'epoch': 0.77} + 77%|███████▋ | 9421/12188 [55:11<5:28:54, 7.13s/it] 77%|███████▋ | 9422/12188 [55:19<5:36:02, 7.29s/it] {'loss': 0.2953, 'grad_norm': 0.8103657550204946, 'learning_rate': 1.2918722178612142e-06, 'epoch': 0.77} + 77%|███████▋ | 9422/12188 [55:19<5:36:02, 7.29s/it] 77%|███████▋ | 9423/12188 [55:26<5:32:05, 7.21s/it] {'loss': 0.293, 'grad_norm': 0.7692145795847605, 'learning_rate': 1.2909810340525408e-06, 'epoch': 0.77} + 77%|███████▋ | 9423/12188 [55:26<5:32:05, 7.21s/it] 77%|███████▋ | 9424/12188 [55:33<5:28:34, 7.13s/it] {'loss': 0.3599, 'grad_norm': 1.090311628755862, 'learning_rate': 1.2900901121688298e-06, 'epoch': 0.77} + 77%|███████▋ | 9424/12188 [55:33<5:28:34, 7.13s/it] 77%|███████▋ | 9425/12188 [55:40<5:23:41, 7.03s/it] {'loss': 0.3059, 'grad_norm': 0.7996196852048367, 'learning_rate': 1.2891994522729928e-06, 'epoch': 0.77} + 77%|███████▋ | 9425/12188 [55:40<5:23:41, 7.03s/it] 77%|███████▋ | 9426/12188 [55:47<5:25:30, 7.07s/it] {'loss': 0.303, 'grad_norm': 0.6816599368407352, 'learning_rate': 1.2883090544279291e-06, 'epoch': 0.77} + 77%|███████▋ | 9426/12188 [55:47<5:25:30, 7.07s/it] 77%|███████▋ | 9427/12188 [55:54<5:30:30, 7.18s/it] {'loss': 0.2899, 'grad_norm': 0.8980641814284908, 'learning_rate': 1.2874189186965181e-06, 'epoch': 0.77} + 77%|███████▋ | 9427/12188 [55:54<5:30:30, 7.18s/it] 77%|███████▋ | 9428/12188 [56:01<5:28:34, 7.14s/it] {'loss': 0.2761, 'grad_norm': 0.8026988289833895, 'learning_rate': 1.2865290451416179e-06, 'epoch': 0.77} + 77%|███████▋ | 9428/12188 [56:01<5:28:34, 7.14s/it] 77%|███████▋ | 9429/12188 [56:08<5:30:09, 7.18s/it] {'loss': 0.2782, 'grad_norm': 0.7183965355871295, 'learning_rate': 1.2856394338260691e-06, 'epoch': 0.77} + 77%|███████▋ | 9429/12188 [56:09<5:30:09, 7.18s/it] 77%|███████▋ | 9430/12188 [56:15<5:27:31, 7.13s/it] {'loss': 0.2586, 'grad_norm': 0.6741752848912461, 'learning_rate': 1.2847500848126976e-06, 'epoch': 0.77} + 77%|███████▋ | 9430/12188 [56:16<5:27:31, 7.13s/it] 77%|███████▋ | 9431/12188 [56:23<5:28:09, 7.14s/it] {'loss': 0.2549, 'grad_norm': 0.6133092552228466, 'learning_rate': 1.2838609981643041e-06, 'epoch': 0.77} + 77%|███████▋ | 9431/12188 [56:23<5:28:09, 7.14s/it] 77%|███████▋ | 9432/12188 [56:32<6:02:04, 7.88s/it] {'loss': 0.2713, 'grad_norm': 0.7071532718167487, 'learning_rate': 1.2829721739436786e-06, 'epoch': 0.77} + 77%|███████▋ | 9432/12188 [56:32<6:02:04, 7.88s/it] 77%|███████▋ | 9433/12188 [56:39<5:48:44, 7.60s/it] {'loss': 0.2644, 'grad_norm': 0.6697536128321161, 'learning_rate': 1.2820836122135843e-06, 'epoch': 0.77} + 77%|███████▋ | 9433/12188 [56:39<5:48:44, 7.60s/it] 77%|███████▋ | 9434/12188 [56:47<5:53:55, 7.71s/it] {'loss': 0.2849, 'grad_norm': 0.6947180714669959, 'learning_rate': 1.2811953130367722e-06, 'epoch': 0.77} + 77%|███████▋ | 9434/12188 [56:47<5:53:55, 7.71s/it] 77%|███████▋ | 9435/12188 [56:55<5:48:50, 7.60s/it] {'loss': 0.2729, 'grad_norm': 0.7133139634168204, 'learning_rate': 1.2803072764759738e-06, 'epoch': 0.77} + 77%|███████▋ | 9435/12188 [56:55<5:48:50, 7.60s/it] 77%|███████▋ | 9436/12188 [57:02<5:43:17, 7.48s/it] {'loss': 0.3161, 'grad_norm': 0.7723794815153069, 'learning_rate': 1.2794195025938982e-06, 'epoch': 0.77} + 77%|███████▋ | 9436/12188 [57:02<5:43:17, 7.48s/it] 77%|███████▋ | 9437/12188 [57:10<5:47:16, 7.57s/it] {'loss': 0.3426, 'grad_norm': 0.7741032722507116, 'learning_rate': 1.2785319914532408e-06, 'epoch': 0.77} + 77%|███████▋ | 9437/12188 [57:10<5:47:16, 7.57s/it] 77%|███████▋ | 9438/12188 [57:17<5:43:04, 7.49s/it] {'loss': 0.2933, 'grad_norm': 0.7396300172100171, 'learning_rate': 1.2776447431166738e-06, 'epoch': 0.77} + 77%|███████▋ | 9438/12188 [57:17<5:43:04, 7.49s/it] 77%|███████▋ | 9439/12188 [57:24<5:37:11, 7.36s/it] {'loss': 0.2964, 'grad_norm': 0.7948135966218227, 'learning_rate': 1.2767577576468549e-06, 'epoch': 0.77} + 77%|███████▋ | 9439/12188 [57:24<5:37:11, 7.36s/it] 77%|███████▋ | 9440/12188 [57:32<5:51:59, 7.69s/it] {'loss': 0.3096, 'grad_norm': 0.6840064269370589, 'learning_rate': 1.275871035106423e-06, 'epoch': 0.77} + 77%|███████▋ | 9440/12188 [57:32<5:51:59, 7.69s/it] 77%|███████▋ | 9441/12188 [57:41<6:05:45, 7.99s/it] {'loss': 0.3113, 'grad_norm': 0.6495447543273203, 'learning_rate': 1.2749845755579947e-06, 'epoch': 0.77} + 77%|███████▋ | 9441/12188 [57:41<6:05:45, 7.99s/it] 77%|███████▋ | 9442/12188 [57:48<5:55:05, 7.76s/it] {'loss': 0.3225, 'grad_norm': 0.6766981049614856, 'learning_rate': 1.2740983790641697e-06, 'epoch': 0.77} + 77%|███████▋ | 9442/12188 [57:48<5:55:05, 7.76s/it] 77%|███████▋ | 9443/12188 [57:58<6:19:35, 8.30s/it] {'loss': 0.3076, 'grad_norm': 0.6725192646461425, 'learning_rate': 1.2732124456875327e-06, 'epoch': 0.77} + 77%|███████▋ | 9443/12188 [57:58<6:19:35, 8.30s/it] 77%|███████▋ | 9444/12188 [58:05<5:59:25, 7.86s/it] {'loss': 0.2807, 'grad_norm': 0.682036072345719, 'learning_rate': 1.2723267754906437e-06, 'epoch': 0.77} + 77%|███████▋ | 9444/12188 [58:05<5:59:25, 7.86s/it] 77%|███████▋ | 9445/12188 [58:12<5:50:50, 7.67s/it] {'loss': 0.2893, 'grad_norm': 0.716233458917682, 'learning_rate': 1.2714413685360505e-06, 'epoch': 0.77} + 77%|███████▋ | 9445/12188 [58:12<5:50:50, 7.67s/it] 78%|███████▊ | 9446/12188 [58:19<5:44:43, 7.54s/it] {'loss': 0.3299, 'grad_norm': 0.7850519489264427, 'learning_rate': 1.2705562248862757e-06, 'epoch': 0.77} + 78%|███████▊ | 9446/12188 [58:19<5:44:43, 7.54s/it] 78%|███████▊ | 9447/12188 [58:26<5:40:35, 7.46s/it] {'loss': 0.311, 'grad_norm': 0.6852465421585731, 'learning_rate': 1.2696713446038283e-06, 'epoch': 0.78} + 78%|███████▊ | 9447/12188 [58:26<5:40:35, 7.46s/it] 78%|███████▊ | 9448/12188 [58:34<5:40:57, 7.47s/it] {'loss': 0.3067, 'grad_norm': 0.6860776514836957, 'learning_rate': 1.2687867277511995e-06, 'epoch': 0.78} + 78%|███████▊ | 9448/12188 [58:34<5:40:57, 7.47s/it] 78%|███████▊ | 9449/12188 [58:41<5:31:22, 7.26s/it] {'loss': 0.2795, 'grad_norm': 0.6685275864465627, 'learning_rate': 1.2679023743908552e-06, 'epoch': 0.78} + 78%|███████▊ | 9449/12188 [58:41<5:31:22, 7.26s/it] 78%|███████▊ | 9450/12188 [58:48<5:28:11, 7.19s/it] {'loss': 0.3318, 'grad_norm': 0.7912960154317725, 'learning_rate': 1.2670182845852508e-06, 'epoch': 0.78} + 78%|███████▊ | 9450/12188 [58:48<5:28:11, 7.19s/it] 78%|███████▊ | 9451/12188 [58:55<5:25:17, 7.13s/it] {'loss': 0.3361, 'grad_norm': 0.6713730890441729, 'learning_rate': 1.266134458396816e-06, 'epoch': 0.78} + 78%|███████▊ | 9451/12188 [58:55<5:25:17, 7.13s/it] 78%|███████▊ | 9452/12188 [59:02<5:26:17, 7.16s/it] {'loss': 0.3145, 'grad_norm': 0.6979109112831349, 'learning_rate': 1.2652508958879671e-06, 'epoch': 0.78} + 78%|███████▊ | 9452/12188 [59:02<5:26:17, 7.16s/it] 78%|███████▊ | 9453/12188 [59:09<5:23:32, 7.10s/it] {'loss': 0.3001, 'grad_norm': 0.6790347865772011, 'learning_rate': 1.2643675971211012e-06, 'epoch': 0.78} + 78%|███████▊ | 9453/12188 [59:09<5:23:32, 7.10s/it] 78%|███████▊ | 9454/12188 [59:16<5:23:00, 7.09s/it] {'loss': 0.2778, 'grad_norm': 0.7405207579929024, 'learning_rate': 1.2634845621585923e-06, 'epoch': 0.78} + 78%|███████▊ | 9454/12188 [59:16<5:23:00, 7.09s/it] 78%|███████▊ | 9455/12188 [59:23<5:23:11, 7.10s/it] {'loss': 0.3462, 'grad_norm': 0.6927074388562803, 'learning_rate': 1.2626017910628018e-06, 'epoch': 0.78} + 78%|███████▊ | 9455/12188 [59:23<5:23:11, 7.10s/it] 78%|███████▊ | 9456/12188 [59:35<6:23:21, 8.42s/it] {'loss': 0.3152, 'grad_norm': 0.8591569598593007, 'learning_rate': 1.2617192838960684e-06, 'epoch': 0.78} + 78%|███████▊ | 9456/12188 [59:35<6:23:21, 8.42s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fb96e674090> +[Try #0] Failed to fetch sample 4487054 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fb96e674090> +Problematic sample: {'image': '20240822_131046_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Office Theme'"}, {'from': 'gpt', 'value': '\nclick(x=0.1175, y=0.139)\n'}]} + 78%|███████▊ | 9457/12188 [59:42<6:05:27, 8.03s/it] {'loss': 0.3784, 'grad_norm': 0.7373819944703198, 'learning_rate': 1.2608370407207115e-06, 'epoch': 0.78} + 78%|███████▊ | 9457/12188 [59:42<6:05:27, 8.03s/it] 78%|███████▊ | 9458/12188 [59:48<5:49:01, 7.67s/it] {'loss': 0.294, 'grad_norm': 0.7196285011302676, 'learning_rate': 1.2599550615990369e-06, 'epoch': 0.78} + 78%|███████▊ | 9458/12188 [59:48<5:49:01, 7.67s/it] 78%|███████▊ | 9459/12188 [59:55<5:35:17, 7.37s/it] {'loss': 0.3251, 'grad_norm': 0.6804280689339713, 'learning_rate': 1.2590733465933252e-06, 'epoch': 0.78} + 78%|███████▊ | 9459/12188 [59:55<5:35:17, 7.37s/it] 78%|███████▊ | 9460/12188 [1:00:03<5:37:31, 7.42s/it] {'loss': 0.3242, 'grad_norm': 0.6619595791729395, 'learning_rate': 1.2581918957658457e-06, 'epoch': 0.78} + 78%|███████▊ | 9460/12188 [1:00:03<5:37:31, 7.42s/it] 78%|███████▊ | 9461/12188 [1:00:11<5:45:08, 7.59s/it] {'loss': 0.2944, 'grad_norm': 0.7578610422221058, 'learning_rate': 1.2573107091788406e-06, 'epoch': 0.78} + 78%|███████▊ | 9461/12188 [1:00:11<5:45:08, 7.59s/it] 78%|███████▊ | 9462/12188 [1:00:17<5:33:57, 7.35s/it] {'loss': 0.3009, 'grad_norm': 0.7157448508944692, 'learning_rate': 1.2564297868945407e-06, 'epoch': 0.78} + 78%|███████▊ | 9462/12188 [1:00:17<5:33:57, 7.35s/it] 78%|███████▊ | 9463/12188 [1:00:24<5:26:23, 7.19s/it] {'loss': 0.3039, 'grad_norm': 0.6860195461033018, 'learning_rate': 1.255549128975156e-06, 'epoch': 0.78} + 78%|███████▊ | 9463/12188 [1:00:24<5:26:23, 7.19s/it] 78%|███████▊ | 9464/12188 [1:00:31<5:20:52, 7.07s/it] {'loss': 0.3089, 'grad_norm': 0.6741390977208243, 'learning_rate': 1.2546687354828747e-06, 'epoch': 0.78} + 78%|███████▊ | 9464/12188 [1:00:31<5:20:52, 7.07s/it] 78%|███████▊ | 9465/12188 [1:00:38<5:16:56, 6.98s/it] {'loss': 0.2754, 'grad_norm': 0.8194516120997102, 'learning_rate': 1.2537886064798693e-06, 'epoch': 0.78} + 78%|███████▊ | 9465/12188 [1:00:38<5:16:56, 6.98s/it] 78%|███████▊ | 9466/12188 [1:00:49<6:08:25, 8.12s/it] {'loss': 0.3347, 'grad_norm': 0.6659650894760413, 'learning_rate': 1.2529087420282958e-06, 'epoch': 0.78} + 78%|███████▊ | 9466/12188 [1:00:49<6:08:25, 8.12s/it] 78%|███████▊ | 9467/12188 [1:00:57<6:10:13, 8.16s/it] {'loss': 0.3267, 'grad_norm': 0.7339893043553106, 'learning_rate': 1.2520291421902852e-06, 'epoch': 0.78} + 78%|███████▊ | 9467/12188 [1:00:57<6:10:13, 8.16s/it] 78%|███████▊ | 9468/12188 [1:01:05<6:05:45, 8.07s/it] {'loss': 0.2818, 'grad_norm': 0.6718396223390917, 'learning_rate': 1.2511498070279564e-06, 'epoch': 0.78} + 78%|███████▊ | 9468/12188 [1:01:05<6:05:45, 8.07s/it] 78%|███████▊ | 9469/12188 [1:01:12<5:51:40, 7.76s/it] {'loss': 0.3246, 'grad_norm': 0.6862738211009264, 'learning_rate': 1.250270736603405e-06, 'epoch': 0.78} + 78%|███████▊ | 9469/12188 [1:01:12<5:51:40, 7.76s/it] 78%|███████▊ | 9470/12188 [1:01:19<5:42:45, 7.57s/it] {'loss': 0.2899, 'grad_norm': 0.6631596749794709, 'learning_rate': 1.2493919309787089e-06, 'epoch': 0.78} + 78%|███████▊ | 9470/12188 [1:01:19<5:42:45, 7.57s/it] 78%|███████▊ | 9471/12188 [1:01:26<5:37:26, 7.45s/it] {'loss': 0.2881, 'grad_norm': 0.6670080221615525, 'learning_rate': 1.2485133902159297e-06, 'epoch': 0.78} + 78%|███████▊ | 9471/12188 [1:01:26<5:37:26, 7.45s/it] 78%|███████▊ | 9472/12188 [1:01:34<5:38:24, 7.48s/it] {'loss': 0.3239, 'grad_norm': 0.727715766402037, 'learning_rate': 1.2476351143771065e-06, 'epoch': 0.78} + 78%|███████▊ | 9472/12188 [1:01:34<5:38:24, 7.48s/it] 78%|███████▊ | 9473/12188 [1:01:41<5:33:16, 7.37s/it] {'loss': 0.3075, 'grad_norm': 0.6432346317706314, 'learning_rate': 1.2467571035242643e-06, 'epoch': 0.78} + 78%|███████▊ | 9473/12188 [1:01:41<5:33:16, 7.37s/it] 78%|███████▊ | 9474/12188 [1:01:48<5:30:36, 7.31s/it] {'loss': 0.2902, 'grad_norm': 0.7043256229264242, 'learning_rate': 1.245879357719404e-06, 'epoch': 0.78} + 78%|███████▊ | 9474/12188 [1:01:48<5:30:36, 7.31s/it] 78%|███████▊ | 9475/12188 [1:01:58<6:05:10, 8.08s/it] {'loss': 0.3166, 'grad_norm': 0.8501645342264728, 'learning_rate': 1.245001877024512e-06, 'epoch': 0.78} + 78%|███████▊ | 9475/12188 [1:01:58<6:05:10, 8.08s/it] 78%|███████▊ | 9476/12188 [1:02:05<5:53:43, 7.83s/it] {'loss': 0.3197, 'grad_norm': 0.6555193967848882, 'learning_rate': 1.2441246615015562e-06, 'epoch': 0.78} + 78%|███████▊ | 9476/12188 [1:02:05<5:53:43, 7.83s/it] 78%|██████���▊ | 9477/12188 [1:02:12<5:46:40, 7.67s/it] {'loss': 0.3138, 'grad_norm': 0.7536958757469431, 'learning_rate': 1.2432477112124807e-06, 'epoch': 0.78} + 78%|███████▊ | 9477/12188 [1:02:12<5:46:40, 7.67s/it] 78%|███████▊ | 9478/12188 [1:02:21<5:54:43, 7.85s/it] {'loss': 0.2837, 'grad_norm': 0.6704368430312582, 'learning_rate': 1.242371026219218e-06, 'epoch': 0.78} + 78%|███████▊ | 9478/12188 [1:02:21<5:54:43, 7.85s/it] 78%|███████▊ | 9479/12188 [1:02:28<5:43:51, 7.62s/it] {'loss': 0.303, 'grad_norm': 0.7237558744286343, 'learning_rate': 1.2414946065836741e-06, 'epoch': 0.78} + 78%|███████▊ | 9479/12188 [1:02:28<5:43:51, 7.62s/it] 78%|███████▊ | 9480/12188 [1:02:34<5:32:53, 7.38s/it] {'loss': 0.2831, 'grad_norm': 0.7114135496973234, 'learning_rate': 1.2406184523677434e-06, 'epoch': 0.78} + 78%|███████▊ | 9480/12188 [1:02:34<5:32:53, 7.38s/it] 78%|███████▊ | 9481/12188 [1:02:42<5:32:36, 7.37s/it] {'loss': 0.3124, 'grad_norm': 0.6625244997079408, 'learning_rate': 1.2397425636332989e-06, 'epoch': 0.78} + 78%|███████▊ | 9481/12188 [1:02:42<5:32:36, 7.37s/it] 78%|███████▊ | 9482/12188 [1:02:51<5:56:19, 7.90s/it] {'loss': 0.2637, 'grad_norm': 0.6470667678297466, 'learning_rate': 1.2388669404421915e-06, 'epoch': 0.78} + 78%|███████▊ | 9482/12188 [1:02:51<5:56:19, 7.90s/it] 78%|███████▊ | 9483/12188 [1:02:59<5:53:40, 7.84s/it] {'loss': 0.2775, 'grad_norm': 0.6486086360448046, 'learning_rate': 1.2379915828562599e-06, 'epoch': 0.78} + 78%|███████▊ | 9483/12188 [1:02:59<5:53:40, 7.84s/it] 78%|███████▊ | 9484/12188 [1:03:06<5:51:52, 7.81s/it] {'loss': 0.2685, 'grad_norm': 0.6606486354089672, 'learning_rate': 1.2371164909373185e-06, 'epoch': 0.78} + 78%|███████▊ | 9484/12188 [1:03:06<5:51:52, 7.81s/it] 78%|███████▊ | 9485/12188 [1:03:16<6:14:25, 8.31s/it] {'loss': 0.3336, 'grad_norm': 0.6849682620149726, 'learning_rate': 1.2362416647471636e-06, 'epoch': 0.78} + 78%|███████▊ | 9485/12188 [1:03:16<6:14:25, 8.31s/it] 78%|███████▊ | 9486/12188 [1:03:23<6:01:53, 8.04s/it] {'loss': 0.3066, 'grad_norm': 0.6531808298074107, 'learning_rate': 1.2353671043475767e-06, 'epoch': 0.78} + 78%|███████▊ | 9486/12188 [1:03:23<6:01:53, 8.04s/it] 78%|███████▊ | 9487/12188 [1:03:30<5:44:15, 7.65s/it] {'loss': 0.3151, 'grad_norm': 0.6544563530183068, 'learning_rate': 1.2344928098003151e-06, 'epoch': 0.78} + 78%|███████▊ | 9487/12188 [1:03:30<5:44:15, 7.65s/it] 78%|███████▊ | 9488/12188 [1:03:37<5:28:37, 7.30s/it] {'loss': 0.3289, 'grad_norm': 0.7358278499249498, 'learning_rate': 1.2336187811671213e-06, 'epoch': 0.78} + 78%|███████▊ | 9488/12188 [1:03:37<5:28:37, 7.30s/it] 78%|███████▊ | 9489/12188 [1:03:43<5:19:48, 7.11s/it] {'loss': 0.2896, 'grad_norm': 0.7484987615520949, 'learning_rate': 1.23274501850972e-06, 'epoch': 0.78} + 78%|███████▊ | 9489/12188 [1:03:43<5:19:48, 7.11s/it] 78%|███████▊ | 9490/12188 [1:03:50<5:19:37, 7.11s/it] {'loss': 0.2864, 'grad_norm': 0.6700905868323748, 'learning_rate': 1.2318715218898108e-06, 'epoch': 0.78} + 78%|███████▊ | 9490/12188 [1:03:50<5:19:37, 7.11s/it] 78%|███████▊ | 9491/12188 [1:03:57<5:17:16, 7.06s/it] {'loss': 0.3174, 'grad_norm': 0.691787996276545, 'learning_rate': 1.2309982913690827e-06, 'epoch': 0.78} + 78%|███████▊ | 9491/12188 [1:03:57<5:17:16, 7.06s/it] 78%|███████▊ | 9492/12188 [1:04:05<5:26:55, 7.28s/it] {'loss': 0.339, 'grad_norm': 0.6547584822390023, 'learning_rate': 1.2301253270091983e-06, 'epoch': 0.78} + 78%|███████▊ | 9492/12188 [1:04:05<5:26:55, 7.28s/it] 78%|███████▊ | 9493/12188 [1:04:12<5:23:49, 7.21s/it] {'loss': 0.3063, 'grad_norm': 0.6824202785269682, 'learning_rate': 1.2292526288718065e-06, 'epoch': 0.78} + 78%|███████▊ | 9493/12188 [1:04:12<5:23:49, 7.21s/it] 78%|███████▊ | 9494/12188 [1:04:19<5:22:16, 7.18s/it] {'loss': 0.3247, 'grad_norm': 0.6281799792090326, 'learning_rate': 1.2283801970185378e-06, 'epoch': 0.78} + 78%|███████▊ | 9494/12188 [1:04:19<5:22:16, 7.18s/it] 78%|███████▊ | 9495/12188 [1:04:26<5:20:02, 7.13s/it] {'loss': 0.3144, 'grad_norm': 0.7389851795904141, 'learning_rate': 1.2275080315109984e-06, 'epoch': 0.78} + 78%|███████▊ | 9495/12188 [1:04:26<5:20:02, 7.13s/it] 78%|███████▊ | 9496/12188 [1:04:33<5:20:02, 7.13s/it] {'loss': 0.313, 'grad_norm': 0.6473730926574172, 'learning_rate': 1.2266361324107818e-06, 'epoch': 0.78} + 78%|███████▊ | 9496/12188 [1:04:33<5:20:02, 7.13s/it] 78%|███████▊ | 9497/12188 [1:04:41<5:26:40, 7.28s/it] {'loss': 0.3364, 'grad_norm': 0.6537587758075698, 'learning_rate': 1.2257644997794593e-06, 'epoch': 0.78} + 78%|███████▊ | 9497/12188 [1:04:41<5:26:40, 7.28s/it] 78%|███████▊ | 9498/12188 [1:04:48<5:25:45, 7.27s/it] {'loss': 0.316, 'grad_norm': 0.6852850177916169, 'learning_rate': 1.224893133678583e-06, 'epoch': 0.78} + 78%|███████▊ | 9498/12188 [1:04:48<5:25:45, 7.27s/it] 78%|███████▊ | 9499/12188 [1:04:58<5:56:35, 7.96s/it] {'loss': 0.3221, 'grad_norm': 0.6699206881801049, 'learning_rate': 1.2240220341696896e-06, 'epoch': 0.78} + 78%|███████▊ | 9499/12188 [1:04:58<5:56:35, 7.96s/it] 78%|███████▊ | 9500/12188 [1:05:05<5:43:28, 7.67s/it] {'loss': 0.3059, 'grad_norm': 0.6932171531900697, 'learning_rate': 1.2231512013142921e-06, 'epoch': 0.78} + 78%|███████▊ | 9500/12188 [1:05:05<5:43:28, 7.67s/it] 78%|███████▊ | 9501/12188 [1:05:12<5:41:35, 7.63s/it] {'loss': 0.2886, 'grad_norm': 0.6711948490161517, 'learning_rate': 1.2222806351738885e-06, 'epoch': 0.78} + 78%|███████▊ | 9501/12188 [1:05:12<5:41:35, 7.63s/it]Traceback (most recent call last): + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 280, in load + s = read(self.decodermaxblock) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/PngImagePlugin.py", line 989, in load_read + cid, pos, length = self.png.read() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/PngImagePlugin.py", line 173, in read + length = i32(s) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/_binary.py", line 95, in i32be + return unpack_from(">I", c, o)[0] +struct.error: unpack_from requires a buffer of at least 4 bytes for unpacking 4 bytes at offset 0 (actual buffer size is 0) + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 90, in pil_loader + return img.convert("RGB") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 993, in convert + self.load() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 287, in load + raise OSError(msg) from e +OSError: image file is truncated +[Try #0] Failed to fetch sample 4881218 in VC:s3://gui/OS-Atlas/desktop_domain/macos_images. Exception: image file is truncated +Problematic sample: {'image': '20240905_160247_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on '登录'"}, {'from': 'gpt', 'value': '\nclick(x=0.9566, y=0.1)\n'}]} + 78%|███████▊ | 9502/12188 [1:05:19<5:30:59, 7.39s/it] {'loss': 0.2734, 'grad_norm': 0.6411665335639382, 'learning_rate': 1.2214103358099593e-06, 'epoch': 0.78} + 78%|███████▊ | 9502/12188 [1:05:19<5:30:59, 7.39s/it] 78%|███████▊ | 9503/12188 [1:05:26<5:28:57, 7.35s/it] {'loss': 0.3354, 'grad_norm': 0.751426593137183, 'learning_rate': 1.2205403032839597e-06, 'epoch': 0.78} + 78%|███████▊ | 9503/12188 [1:05:26<5:28:57, 7.35s/it] 78%|███████▊ | 9504/12188 [1:05:34<5:26:37, 7.30s/it] {'loss': 0.2891, 'grad_norm': 0.7318218321182025, 'learning_rate': 1.2196705376573326e-06, 'epoch': 0.78} + 78%|███████▊ | 9504/12188 [1:05:34<5:26:37, 7.30s/it] 78%|███████▊ | 9505/12188 [1:05:43<5:56:40, 7.98s/it] {'loss': 0.2669, 'grad_norm': 0.6728346409093117, 'learning_rate': 1.2188010389914978e-06, 'epoch': 0.78} + 78%|███████▊ | 9505/12188 [1:05:43<5:56:40, 7.98s/it] 78%|███████▊ | 9506/12188 [1:05:51<5:52:02, 7.88s/it] {'loss': 0.2737, 'grad_norm': 0.6613771662348439, 'learning_rate': 1.2179318073478579e-06, 'epoch': 0.78} + 78%|███████▊ | 9506/12188 [1:05:51<5:52:02, 7.88s/it] 78%|███████▊ | 9507/12188 [1:05:58<5:40:56, 7.63s/it] {'loss': 0.2927, 'grad_norm': 0.7315991156499347, 'learning_rate': 1.2170628427877996e-06, 'epoch': 0.78} + 78%|███████▊ | 9507/12188 [1:05:58<5:40:56, 7.63s/it] 78%|███████▊ | 9508/12188 [1:06:06<5:48:46, 7.81s/it] {'loss': 0.3061, 'grad_norm': 0.7411721107028592, 'learning_rate': 1.2161941453726838e-06, 'epoch': 0.78} + 78%|███████▊ | 9508/12188 [1:06:06<5:48:46, 7.81s/it] 78%|███████▊ | 9509/12188 [1:06:13<5:38:52, 7.59s/it] {'loss': 0.2879, 'grad_norm': 0.6841386781224476, 'learning_rate': 1.2153257151638598e-06, 'epoch': 0.78} + 78%|███████▊ | 9509/12188 [1:06:13<5:38:52, 7.59s/it] 78%|███████▊ | 9510/12188 [1:06:20<5:32:22, 7.45s/it] {'loss': 0.2844, 'grad_norm': 0.6894908969356803, 'learning_rate': 1.214457552222652e-06, 'epoch': 0.78} + 78%|███████▊ | 9510/12188 [1:06:20<5:32:22, 7.45s/it] 78%|███████▊ | 9511/12188 [1:06:27<5:29:32, 7.39s/it] {'loss': 0.3343, 'grad_norm': 0.7304056016719347, 'learning_rate': 1.2135896566103712e-06, 'epoch': 0.78} + 78%|███████▊ | 9511/12188 [1:06:27<5:29:32, 7.39s/it] 78%|███████▊ | 9512/12188 [1:06:34<5:21:18, 7.20s/it] {'loss': 0.3022, 'grad_norm': 0.7341963287857188, 'learning_rate': 1.2127220283883057e-06, 'epoch': 0.78} + 78%|███████▊ | 9512/12188 [1:06:34<5:21:18, 7.20s/it] 78%|███████▊ | 9513/12188 [1:06:41<5:17:23, 7.12s/it] {'loss': 0.2996, 'grad_norm': 0.6623593461984122, 'learning_rate': 1.211854667617724e-06, 'epoch': 0.78} + 78%|███████▊ | 9513/12188 [1:06:41<5:17:23, 7.12s/it] 78%|███████▊ | 9514/12188 [1:06:48<5:16:13, 7.10s/it] {'loss': 0.3292, 'grad_norm': 0.7024091227599516, 'learning_rate': 1.2109875743598814e-06, 'epoch': 0.78} + 78%|███████▊ | 9514/12188 [1:06:48<5:16:13, 7.10s/it] 78%|███████▊ | 9515/12188 [1:06:55<5:15:09, 7.07s/it] {'loss': 0.293, 'grad_norm': 0.6508531247474956, 'learning_rate': 1.210120748676007e-06, 'epoch': 0.78} + 78%|███████▊ | 9515/12188 [1:06:55<5:15:09, 7.07s/it] 78%|███████▊ | 9516/12188 [1:07:03<5:27:40, 7.36s/it] {'loss': 0.3489, 'grad_norm': 0.7992203546869692, 'learning_rate': 1.2092541906273165e-06, 'epoch': 0.78} + 78%|███████▊ | 9516/12188 [1:07:03<5:27:40, 7.36s/it] 78%|█████��█▊ | 9517/12188 [1:07:10<5:24:57, 7.30s/it] {'loss': 0.3141, 'grad_norm': 0.7098234383916726, 'learning_rate': 1.2083879002750065e-06, 'epoch': 0.78} + 78%|███████▊ | 9517/12188 [1:07:10<5:24:57, 7.30s/it] 78%|███████▊ | 9518/12188 [1:07:17<5:19:25, 7.18s/it] {'loss': 0.3324, 'grad_norm': 1.2456837757467565, 'learning_rate': 1.2075218776802493e-06, 'epoch': 0.78} + 78%|███████▊ | 9518/12188 [1:07:17<5:19:25, 7.18s/it] 78%|███████▊ | 9519/12188 [1:07:24<5:13:59, 7.06s/it] {'loss': 0.2937, 'grad_norm': 0.7673006978192884, 'learning_rate': 1.2066561229042046e-06, 'epoch': 0.78} + 78%|███████▊ | 9519/12188 [1:07:24<5:13:59, 7.06s/it] 78%|███████▊ | 9520/12188 [1:07:31<5:12:17, 7.02s/it] {'loss': 0.3111, 'grad_norm': 0.7037646672558758, 'learning_rate': 1.2057906360080113e-06, 'epoch': 0.78} + 78%|███████▊ | 9520/12188 [1:07:31<5:12:17, 7.02s/it] 78%|███████▊ | 9521/12188 [1:07:39<5:19:24, 7.19s/it] {'loss': 0.2989, 'grad_norm': 0.7326650897633415, 'learning_rate': 1.2049254170527857e-06, 'epoch': 0.78} + 78%|███████▊ | 9521/12188 [1:07:39<5:19:24, 7.19s/it] 78%|███████▊ | 9522/12188 [1:07:46<5:20:45, 7.22s/it] {'loss': 0.2934, 'grad_norm': 0.6770416018904425, 'learning_rate': 1.2040604660996318e-06, 'epoch': 0.78} + 78%|███████▊ | 9522/12188 [1:07:46<5:20:45, 7.22s/it] 78%|███████▊ | 9523/12188 [1:07:53<5:22:45, 7.27s/it] {'loss': 0.3039, 'grad_norm': 0.6667713105486341, 'learning_rate': 1.2031957832096276e-06, 'epoch': 0.78} + 78%|███████▊ | 9523/12188 [1:07:53<5:22:45, 7.27s/it] 78%|███████▊ | 9524/12188 [1:08:01<5:26:07, 7.35s/it] {'loss': 0.2835, 'grad_norm': 0.8132795404644281, 'learning_rate': 1.202331368443838e-06, 'epoch': 0.78} + 78%|███████▊ | 9524/12188 [1:08:01<5:26:07, 7.35s/it] 78%|███████▊ | 9525/12188 [1:08:08<5:27:56, 7.39s/it] {'loss': 0.3141, 'grad_norm': 0.7202512886037464, 'learning_rate': 1.2014672218633062e-06, 'epoch': 0.78} + 78%|███████▊ | 9525/12188 [1:08:08<5:27:56, 7.39s/it] 78%|███████▊ | 9526/12188 [1:08:16<5:37:27, 7.61s/it] {'loss': 0.3338, 'grad_norm': 0.9278549009467425, 'learning_rate': 1.2006033435290548e-06, 'epoch': 0.78} + 78%|███████▊ | 9526/12188 [1:08:16<5:37:27, 7.61s/it] 78%|███████▊ | 9527/12188 [1:08:24<5:43:15, 7.74s/it] {'loss': 0.3096, 'grad_norm': 0.7530893852172502, 'learning_rate': 1.1997397335020927e-06, 'epoch': 0.78} + 78%|███████▊ | 9527/12188 [1:08:24<5:43:15, 7.74s/it] 78%|███████▊ | 9528/12188 [1:08:31<5:30:05, 7.45s/it] {'loss': 0.3499, 'grad_norm': 0.7527084011929829, 'learning_rate': 1.1988763918434037e-06, 'epoch': 0.78} + 78%|███████▊ | 9528/12188 [1:08:31<5:30:05, 7.45s/it] 78%|███████▊ | 9529/12188 [1:08:38<5:22:02, 7.27s/it] {'loss': 0.3211, 'grad_norm': 0.6764922012509791, 'learning_rate': 1.198013318613957e-06, 'epoch': 0.78} + 78%|███████▊ | 9529/12188 [1:08:38<5:22:02, 7.27s/it] 78%|███████▊ | 9530/12188 [1:08:45<5:23:38, 7.31s/it] {'loss': 0.2737, 'grad_norm': 0.6975693328404183, 'learning_rate': 1.1971505138747024e-06, 'epoch': 0.78} + 78%|███████▊ | 9530/12188 [1:08:45<5:23:38, 7.31s/it] 78%|███████▊ | 9531/12188 [1:08:55<5:47:34, 7.85s/it] {'loss': 0.2882, 'grad_norm': 0.7265654872547649, 'learning_rate': 1.1962879776865676e-06, 'epoch': 0.78} + 78%|███████▊ | 9531/12188 [1:08:55<5:47:34, 7.85s/it] 78%|███████▊ | 9532/12188 [1:09:01<5:34:34, 7.56s/it] {'loss': 0.3108, 'grad_norm': 0.6864282363043378, 'learning_rate': 1.1954257101104644e-06, 'epoch': 0.78} + 78%|███████▊ | 9532/12188 [1:09:01<5:34:34, 7.56s/it] 78%|███████▊ | 9533/12188 [1:09:09<5:37:26, 7.63s/it] {'loss': 0.3079, 'grad_norm': 0.7613611775507109, 'learning_rate': 1.1945637112072867e-06, 'epoch': 0.78} + 78%|███████▊ | 9533/12188 [1:09:09<5:37:26, 7.63s/it] 78%|███████▊ | 9534/12188 [1:09:16<5:27:50, 7.41s/it] {'loss': 0.3008, 'grad_norm': 0.7548069337842737, 'learning_rate': 1.1937019810379047e-06, 'epoch': 0.78} + 78%|███████▊ | 9534/12188 [1:09:16<5:27:50, 7.41s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 78%|███████▊ | 9535/12188 [1:09:23<5:16:00, 7.15s/it] {'loss': 0.6056, 'grad_norm': 0.6414297007594181, 'learning_rate': 1.1928405196631742e-06, 'epoch': 0.78} + 78%|███████▊ | 9535/12188 [1:09:23<5:16:00, 7.15s/it] 78%|███████▊ | 9536/12188 [1:09:32<5:45:56, 7.83s/it] {'loss': 0.2992, 'grad_norm': 0.7193756768965877, 'learning_rate': 1.191979327143929e-06, 'epoch': 0.78} + 78%|███████▊ | 9536/12188 [1:09:32<5:45:56, 7.83s/it] 78%|███████▊ | 9537/12188 [1:09:42<6:15:43, 8.50s/it] {'loss': 0.2944, 'grad_norm': 0.7702850571528689, 'learning_rate': 1.1911184035409863e-06, 'epoch': 0.78} + 78%|███████▊ | 9537/12188 [1:09:42<6:15:43, 8.50s/it] 78%|███████▊ | 9538/12188 [1:09:49<5:54:02, 8.02s/it] {'loss': 0.3154, 'grad_norm': 0.7663066410445181, 'learning_rate': 1.1902577489151413e-06, 'epoch': 0.78} + 78%|███████▊ | 9538/12188 [1:09:49<5:54:02, 8.02s/it] 78%|███████▊ | 9539/12188 [1:09:56<5:35:07, 7.59s/it] {'loss': 0.2865, 'grad_norm': 0.76165944330163, 'learning_rate': 1.189397363327175e-06, 'epoch': 0.78} + 78%|███████▊ | 9539/12188 [1:09:56<5:35:07, 7.59s/it] 78%|███████▊ | 9540/12188 [1:10:03<5:30:20, 7.49s/it] {'loss': 0.3179, 'grad_norm': 0.7094536144078059, 'learning_rate': 1.1885372468378443e-06, 'epoch': 0.78} + 78%|███████▊ | 9540/12188 [1:10:03<5:30:20, 7.49s/it] 78%|███████▊ | 9541/12188 [1:10:10<5:22:23, 7.31s/it] {'loss': 0.3155, 'grad_norm': 0.7416343860459857, 'learning_rate': 1.1876773995078883e-06, 'epoch': 0.78} + 78%|███████▊ | 9541/12188 [1:10:10<5:22:23, 7.31s/it] 78%|███████▊ | 9542/12188 [1:10:17<5:14:50, 7.14s/it] {'loss': 0.2969, 'grad_norm': 0.7200250986599923, 'learning_rate': 1.1868178213980296e-06, 'epoch': 0.78} + 78%|███████▊ | 9542/12188 [1:10:17<5:14:50, 7.14s/it] 78%|███████▊ | 9543/12188 [1:10:24<5:16:25, 7.18s/it] {'loss': 0.2719, 'grad_norm': 0.6837374063455417, 'learning_rate': 1.1859585125689715e-06, 'epoch': 0.78} + 78%|███████▊ | 9543/12188 [1:10:24<5:16:25, 7.18s/it] 78%|███████▊ | 9544/12188 [1:10:31<5:16:28, 7.18s/it] {'loss': 0.2618, 'grad_norm': 0.6569531242541486, 'learning_rate': 1.1850994730813937e-06, 'epoch': 0.78} + 78%|███████▊ | 9544/12188 [1:10:31<5:16:28, 7.18s/it] 78%|███████▊ | 9545/12188 [1:10:39<5:23:19, 7.34s/it] {'loss': 0.3115, 'grad_norm': 0.7087149532167772, 'learning_rate': 1.1842407029959634e-06, 'epoch': 0.78} + 78%|███████▊ | 9545/12188 [1:10:39<5:23:19, 7.34s/it] 78%|███████▊ | 9546/12188 [1:10:45<5:14:56, 7.15s/it] {'loss': 0.3078, 'grad_norm': 0.9693731831225075, 'learning_rate': 1.1833822023733232e-06, 'epoch': 0.78} + 78%|███████▊ | 9546/12188 [1:10:45<5:14:56, 7.15s/it] 78%|███████▊ | 9547/12188 [1:10:52<5:12:13, 7.09s/it] {'loss': 0.3045, 'grad_norm': 0.6441756557316005, 'learning_rate': 1.1825239712741e-06, 'epoch': 0.78} + 78%|███████▊ | 9547/12188 [1:10:52<5:12:13, 7.09s/it] 78%|███████▊ | 9548/12188 [1:11:00<5:17:28, 7.22s/it] {'loss': 0.2776, 'grad_norm': 0.6354188624408107, 'learning_rate': 1.1816660097589016e-06, 'epoch': 0.78} + 78%|███████▊ | 9548/12188 [1:11:00<5:17:28, 7.22s/it] 78%|███████▊ | 9549/12188 [1:11:07<5:12:49, 7.11s/it] {'loss': 0.2865, 'grad_norm': 0.7530952722059109, 'learning_rate': 1.180808317888314e-06, 'epoch': 0.78} + 78%|█��█████▊ | 9549/12188 [1:11:07<5:12:49, 7.11s/it] 78%|███████▊ | 9550/12188 [1:11:13<5:07:20, 6.99s/it] {'loss': 0.3113, 'grad_norm': 0.7084901157532715, 'learning_rate': 1.1799508957229066e-06, 'epoch': 0.78} + 78%|███████▊ | 9550/12188 [1:11:13<5:07:20, 6.99s/it] 78%|███████▊ | 9551/12188 [1:11:21<5:13:15, 7.13s/it] {'loss': 0.2975, 'grad_norm': 0.7005974571266372, 'learning_rate': 1.1790937433232313e-06, 'epoch': 0.78} + 78%|███████▊ | 9551/12188 [1:11:21<5:13:15, 7.13s/it] 78%|███████▊ | 9552/12188 [1:11:28<5:16:40, 7.21s/it] {'loss': 0.274, 'grad_norm': 0.6356441288606516, 'learning_rate': 1.1782368607498174e-06, 'epoch': 0.78} + 78%|███████▊ | 9552/12188 [1:11:28<5:16:40, 7.21s/it] 78%|███████▊ | 9553/12188 [1:11:35<5:11:02, 7.08s/it] {'loss': 0.3287, 'grad_norm': 0.7201159154842803, 'learning_rate': 1.1773802480631758e-06, 'epoch': 0.78} + 78%|███████▊ | 9553/12188 [1:11:35<5:11:02, 7.08s/it] 78%|███████▊ | 9554/12188 [1:11:43<5:22:34, 7.35s/it] {'loss': 0.3462, 'grad_norm': 0.7272091857902244, 'learning_rate': 1.1765239053237981e-06, 'epoch': 0.78} + 78%|███████▊ | 9554/12188 [1:11:43<5:22:34, 7.35s/it] 78%|███████▊ | 9555/12188 [1:11:50<5:23:42, 7.38s/it] {'loss': 0.3048, 'grad_norm': 0.670226406921644, 'learning_rate': 1.1756678325921595e-06, 'epoch': 0.78} + 78%|███████▊ | 9555/12188 [1:11:51<5:23:42, 7.38s/it] 78%|███████▊ | 9556/12188 [1:11:58<5:25:41, 7.42s/it] {'loss': 0.306, 'grad_norm': 0.6642360082764814, 'learning_rate': 1.1748120299287158e-06, 'epoch': 0.78} + 78%|███████▊ | 9556/12188 [1:11:58<5:25:41, 7.42s/it] 78%|███████▊ | 9557/12188 [1:12:05<5:23:57, 7.39s/it] {'loss': 0.3254, 'grad_norm': 0.7324084746116774, 'learning_rate': 1.1739564973938993e-06, 'epoch': 0.78} + 78%|███████▊ | 9557/12188 [1:12:05<5:23:57, 7.39s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f447e937bf0> +[Try #0] Failed to fetch sample 4573768 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f447e937bf0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Printable version'"}, {'from': 'gpt', 'value': '\nclick(x=0.857, y=0.428)\n'}]} + 78%|███████▊ | 9558/12188 [1:12:12<5:16:22, 7.22s/it] {'loss': 0.2937, 'grad_norm': 0.8029795081658178, 'learning_rate': 1.173101235048129e-06, 'epoch': 0.78} + 78%|███████▊ | 9558/12188 [1:12:12<5:16:22, 7.22s/it] 78%|███████▊ | 9559/12188 [1:12:20<5:27:33, 7.48s/it] {'loss': 0.3076, 'grad_norm': 0.6933923616129102, 'learning_rate': 1.1722462429518006e-06, 'epoch': 0.78} + 78%|███████▊ | 9559/12188 [1:12:20<5:27:33, 7.48s/it] 78%|████��██▊ | 9560/12188 [1:12:27<5:22:18, 7.36s/it] {'loss': 0.3008, 'grad_norm': 0.668708195886214, 'learning_rate': 1.171391521165292e-06, 'epoch': 0.78} + 78%|███████▊ | 9560/12188 [1:12:27<5:22:18, 7.36s/it] 78%|███████▊ | 9561/12188 [1:12:34<5:11:16, 7.11s/it] {'loss': 0.3521, 'grad_norm': 0.735302710110953, 'learning_rate': 1.170537069748965e-06, 'epoch': 0.78} + 78%|███████▊ | 9561/12188 [1:12:34<5:11:16, 7.11s/it] 78%|███████▊ | 9562/12188 [1:12:41<5:10:21, 7.09s/it] {'loss': 0.2928, 'grad_norm': 0.7321737197421424, 'learning_rate': 1.1696828887631557e-06, 'epoch': 0.78} + 78%|███████▊ | 9562/12188 [1:12:41<5:10:21, 7.09s/it] 78%|███████▊ | 9563/12188 [1:12:48<5:08:52, 7.06s/it] {'loss': 0.2955, 'grad_norm': 0.7154487768907141, 'learning_rate': 1.1688289782681889e-06, 'epoch': 0.78} + 78%|███████▊ | 9563/12188 [1:12:48<5:08:52, 7.06s/it][2025-08-17 18:41:11,362] [WARNING] [stage3.py:2118:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time + 78%|███████▊ | 9564/12188 [1:12:57<5:40:56, 7.80s/it] {'loss': 0.2761, 'grad_norm': 0.7703276047921173, 'learning_rate': 1.1679753383243626e-06, 'epoch': 0.78} + 78%|███████▊ | 9564/12188 [1:12:57<5:40:56, 7.80s/it] 78%|███████▊ | 9565/12188 [1:13:08<6:17:49, 8.64s/it] {'loss': 0.3004, 'grad_norm': 0.673878144513089, 'learning_rate': 1.1671219689919627e-06, 'epoch': 0.78} + 78%|███████▊ | 9565/12188 [1:13:08<6:17:49, 8.64s/it] 78%|███████▊ | 9566/12188 [1:13:15<5:54:26, 8.11s/it] {'loss': 0.2869, 'grad_norm': 0.7039668937665268, 'learning_rate': 1.1662688703312503e-06, 'epoch': 0.78} + 78%|███████▊ | 9566/12188 [1:13:15<5:54:26, 8.11s/it] 78%|███████▊ | 9567/12188 [1:13:23<5:51:31, 8.05s/it] {'loss': 0.2991, 'grad_norm': 0.7647827275431048, 'learning_rate': 1.1654160424024718e-06, 'epoch': 0.78} + 78%|███████▊ | 9567/12188 [1:13:23<5:51:31, 8.05s/it] 79%|███████▊ | 9568/12188 [1:13:33<6:18:33, 8.67s/it] {'loss': 0.2682, 'grad_norm': 0.7630141987047323, 'learning_rate': 1.1645634852658504e-06, 'epoch': 0.79} + 79%|███████▊ | 9568/12188 [1:13:33<6:18:33, 8.67s/it] 79%|███████▊ | 9569/12188 [1:13:40<5:58:30, 8.21s/it] {'loss': 0.3007, 'grad_norm': 0.6368847503120773, 'learning_rate': 1.1637111989815953e-06, 'epoch': 0.79} + 79%|███████▊ | 9569/12188 [1:13:40<5:58:30, 8.21s/it] 79%|███████▊ | 9570/12188 [1:13:47<5:40:53, 7.81s/it] {'loss': 0.277, 'grad_norm': 0.7464328940369462, 'learning_rate': 1.1628591836098902e-06, 'epoch': 0.79} + 79%|███████▊ | 9570/12188 [1:13:47<5:40:53, 7.81s/it] 79%|███████▊ | 9571/12188 [1:13:54<5:36:21, 7.71s/it] {'loss': 0.3065, 'grad_norm': 0.6080051610608936, 'learning_rate': 1.162007439210906e-06, 'epoch': 0.79} + 79%|███████▊ | 9571/12188 [1:13:54<5:36:21, 7.71s/it] 79%|███████▊ | 9572/12188 [1:14:04<5:54:50, 8.14s/it] {'loss': 0.313, 'grad_norm': 0.7341045821291764, 'learning_rate': 1.161155965844789e-06, 'epoch': 0.79} + 79%|███████▊ | 9572/12188 [1:14:04<5:54:50, 8.14s/it] 79%|███████▊ | 9573/12188 [1:14:11<5:40:22, 7.81s/it] {'loss': 0.2805, 'grad_norm': 0.6337592222787742, 'learning_rate': 1.1603047635716701e-06, 'epoch': 0.79} + 79%|███████▊ | 9573/12188 [1:14:11<5:40:22, 7.81s/it] 79%|███████▊ | 9574/12188 [1:14:20<5:59:45, 8.26s/it] {'loss': 0.3042, 'grad_norm': 0.7330662729832812, 'learning_rate': 1.1594538324516614e-06, 'epoch': 0.79} + 79%|███████▊ | 9574/12188 [1:14:20<5:59:45, 8.26s/it] 79%|███████▊ | 9575/12188 [1:14:28<5:56:35, 8.19s/it] {'loss': 0.3004, 'grad_norm': 0.6693634646028594, 'learning_rate': 1.1586031725448515e-06, 'epoch': 0.79} + 79%|███████▊ | 9575/12188 [1:14:28<5:56:35, 8.19s/it] 79%|███████▊ | 9576/12188 [1:14:35<5:40:18, 7.82s/it] {'loss': 0.2923, 'grad_norm': 0.6746861486514295, 'learning_rate': 1.157752783911315e-06, 'epoch': 0.79} + 79%|███████▊ | 9576/12188 [1:14:35<5:40:18, 7.82s/it] 79%|███████▊ | 9577/12188 [1:14:42<5:36:58, 7.74s/it] {'loss': 0.2789, 'grad_norm': 0.6815858081018751, 'learning_rate': 1.1569026666111021e-06, 'epoch': 0.79} + 79%|███████▊ | 9577/12188 [1:14:42<5:36:58, 7.74s/it] 79%|███████▊ | 9578/12188 [1:14:49<5:25:54, 7.49s/it] {'loss': 0.2855, 'grad_norm': 0.7394440059884214, 'learning_rate': 1.1560528207042492e-06, 'epoch': 0.79} + 79%|███████▊ | 9578/12188 [1:14:49<5:25:54, 7.49s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 79%|███████▊ | 9579/12188 [1:14:56<5:16:52, 7.29s/it] {'loss': 0.6453, 'grad_norm': 0.5993192816594258, 'learning_rate': 1.1552032462507712e-06, 'epoch': 0.79} + 79%|███████▊ | 9579/12188 [1:14:56<5:16:52, 7.29s/it] 79%|███████▊ | 9580/12188 [1:15:04<5:17:43, 7.31s/it] {'loss': 0.3525, 'grad_norm': 0.6191756958040064, 'learning_rate': 1.1543539433106631e-06, 'epoch': 0.79} + 79%|███████▊ | 9580/12188 [1:15:04<5:17:43, 7.31s/it] 79%|███████▊ | 9581/12188 [1:15:11<5:13:32, 7.22s/it] {'loss': 0.2945, 'grad_norm': 0.6808883229411661, 'learning_rate': 1.1535049119439007e-06, 'epoch': 0.79} + 79%|███████▊ | 9581/12188 [1:15:11<5:13:32, 7.22s/it] 79%|███████▊ | 9582/12188 [1:15:17<5:10:01, 7.14s/it] {'loss': 0.2733, 'grad_norm': 0.6699051196553077, 'learning_rate': 1.1526561522104406e-06, 'epoch': 0.79} + 79%|███████▊ | 9582/12188 [1:15:17<5:10:01, 7.14s/it] 79%|███████▊ | 9583/12188 [1:15:24<5:05:39, 7.04s/it] {'loss': 0.3015, 'grad_norm': 0.8801298633030374, 'learning_rate': 1.1518076641702214e-06, 'epoch': 0.79} + 79%|███████▊ | 9583/12188 [1:15:24<5:05:39, 7.04s/it] 79%|███████▊ | 9584/12188 [1:15:32<5:13:56, 7.23s/it] {'loss': 0.2728, 'grad_norm': 0.7134631934537354, 'learning_rate': 1.1509594478831642e-06, 'epoch': 0.79} + 79%|███████▊ | 9584/12188 [1:15:32<5:13:56, 7.23s/it] 79%|███████▊ | 9585/12188 [1:15:39<5:12:48, 7.21s/it] {'loss': 0.3179, 'grad_norm': 0.7158844308637997, 'learning_rate': 1.1501115034091647e-06, 'epoch': 0.79} + 79%|███████▊ | 9585/12188 [1:15:39<5:12:48, 7.21s/it] 79%|███████▊ | 9586/12188 [1:15:47<5:16:52, 7.31s/it] {'loss': 0.3134, 'grad_norm': 0.7415499902566032, 'learning_rate': 1.149263830808106e-06, 'epoch': 0.79} + 79%|███████▊ | 9586/12188 [1:15:47<5:16:52, 7.31s/it] 79%|███████▊ | 9587/12188 [1:15:55<5:24:54, 7.49s/it] {'loss': 0.2881, 'grad_norm': 0.8060991717623939, 'learning_rate': 1.1484164301398504e-06, 'epoch': 0.79} + 79%|███████▊ | 9587/12188 [1:15:55<5:24:54, 7.49s/it] 79%|███████▊ | 9588/12188 [1:16:02<5:22:08, 7.43s/it] {'loss': 0.2619, 'grad_norm': 0.6895199003234622, 'learning_rate': 1.147569301464237e-06, 'epoch': 0.79} + 79%|███████▊ | 9588/12188 [1:16:02<5:22:08, 7.43s/it] 79%|███████▊ | 9589/12188 [1:16:09<5:19:06, 7.37s/it] {'loss': 0.3277, 'grad_norm': 0.9962710405300726, 'learning_rate': 1.1467224448410913e-06, 'epoch': 0.79} + 79%|███████▊ | 9589/12188 [1:16:09<5:19:06, 7.37s/it] 79%|███████▊ | 9590/12188 [1:16:16<5:14:38, 7.27s/it] {'loss': 0.2992, 'grad_norm': 0.8268005161459283, 'learning_rate': 1.1458758603302145e-06, 'epoch': 0.79} + 79%|███████▊ | 9590/12188 [1:16:16<5:14:38, 7.27s/it] 79%|███████▊ | 9591/12188 [1:16:23<5:11:35, 7.20s/it] {'loss': 0.2964, 'grad_norm': 0.6775982666155976, 'learning_rate': 1.1450295479913925e-06, 'epoch': 0.79} + 79%|███████▊ | 9591/12188 [1:16:23<5:11:35, 7.20s/it] 79%|███████▊ | 9592/12188 [1:16:33<5:46:19, 8.00s/it] {'loss': 0.3728, 'grad_norm': 0.700604773012843, 'learning_rate': 1.1441835078843915e-06, 'epoch': 0.79} + 79%|███████▊ | 9592/12188 [1:16:33<5:46:19, 8.00s/it] 79%|███████▊ | 9593/12188 [1:16:41<5:42:55, 7.93s/it] {'loss': 0.2657, 'grad_norm': 0.736130714002193, 'learning_rate': 1.1433377400689566e-06, 'epoch': 0.79} + 79%|███████▊ | 9593/12188 [1:16:41<5:42:55, 7.93s/it] 79%|███████▊ | 9594/12188 [1:16:48<5:31:06, 7.66s/it] {'loss': 0.2901, 'grad_norm': 0.7079197295691078, 'learning_rate': 1.1424922446048125e-06, 'epoch': 0.79} + 79%|███████▊ | 9594/12188 [1:16:48<5:31:06, 7.66s/it] 79%|███████▊ | 9595/12188 [1:16:58<5:59:02, 8.31s/it] {'loss': 0.257, 'grad_norm': 0.6508326455707649, 'learning_rate': 1.1416470215516707e-06, 'epoch': 0.79} + 79%|███████▊ | 9595/12188 [1:16:58<5:59:02, 8.31s/it] 79%|███████▊ | 9596/12188 [1:17:05<5:51:51, 8.14s/it] {'loss': 0.3288, 'grad_norm': 0.8100833227573343, 'learning_rate': 1.1408020709692152e-06, 'epoch': 0.79} + 79%|███████▊ | 9596/12188 [1:17:05<5:51:51, 8.14s/it] 79%|███████▊ | 9597/12188 [1:17:12<5:34:22, 7.74s/it] {'loss': 0.3002, 'grad_norm': 0.708226885548412, 'learning_rate': 1.1399573929171187e-06, 'epoch': 0.79} + 79%|███████▊ | 9597/12188 [1:17:12<5:34:22, 7.74s/it] 79%|███████▊ | 9598/12188 [1:17:19<5:26:07, 7.56s/it] {'loss': 0.3062, 'grad_norm': 0.647181427796152, 'learning_rate': 1.1391129874550278e-06, 'epoch': 0.79} + 79%|███████▊ | 9598/12188 [1:17:19<5:26:07, 7.56s/it] 79%|███████▉ | 9599/12188 [1:17:26<5:18:34, 7.38s/it] {'loss': 0.3453, 'grad_norm': 0.9709615496345413, 'learning_rate': 1.1382688546425768e-06, 'epoch': 0.79} + 79%|███████▉ | 9599/12188 [1:17:26<5:18:34, 7.38s/it] 79%|███████▉ | 9600/12188 [1:17:33<5:13:57, 7.28s/it] {'loss': 0.2702, 'grad_norm': 0.6234264724080593, 'learning_rate': 1.1374249945393727e-06, 'epoch': 0.79} + 79%|███████▉ | 9600/12188 [1:17:33<5:13:57, 7.28s/it] 79%|███████▉ | 9601/12188 [1:17:40<5:08:06, 7.15s/it] {'loss': 0.3346, 'grad_norm': 0.6976645788127996, 'learning_rate': 1.1365814072050108e-06, 'epoch': 0.79} + 79%|███████▉ | 9601/12188 [1:17:40<5:08:06, 7.15s/it] 79%|███████▉ | 9602/12188 [1:17:48<5:13:25, 7.27s/it] {'loss': 0.3223, 'grad_norm': 0.6663091475818247, 'learning_rate': 1.1357380926990636e-06, 'epoch': 0.79} + 79%|███████▉ | 9602/12188 [1:17:48<5:13:25, 7.27s/it] 79%|███████▉ | 9603/12188 [1:17:54<5:05:20, 7.09s/it] {'loss': 0.2922, 'grad_norm': 0.7201820501034318, 'learning_rate': 1.1348950510810825e-06, 'epoch': 0.79} + 79%|███████▉ | 9603/12188 [1:17:54<5:05:20, 7.09s/it] 79%|███████▉ | 9604/12188 [1:18:04<5:33:40, 7.75s/it] {'loss': 0.2983, 'grad_norm': 0.7490777800008356, 'learning_rate': 1.1340522824106032e-06, 'epoch': 0.79} + 79%|███████▉ | 9604/12188 [1:18:04<5:33:40, 7.75s/it] 79%|███████▉ | 9605/12188 [1:18:11<5:23:30, 7.51s/it] {'loss': 0.2769, 'grad_norm': 0.8471111119197429, 'learning_rate': 1.133209786747142e-06, 'epoch': 0.79} + 79%|███████▉ | 9605/12188 [1:18:11<5:23:30, 7.51s/it] 79%|███████▉ | 9606/12188 [1:18:17<5:13:56, 7.30s/it] {'loss': 0.2788, 'grad_norm': 0.734753515454393, 'learning_rate': 1.1323675641501919e-06, 'epoch': 0.79} + 79%|███████▉ | 9606/12188 [1:18:17<5:13:56, 7.30s/it] 79%|███████▉ | 9607/12188 [1:18:25<5:13:47, 7.29s/it] {'loss': 0.3096, 'grad_norm': 0.6764297272628402, 'learning_rate': 1.131525614679232e-06, 'epoch': 0.79} + 79%|███████▉ | 9607/12188 [1:18:25<5:13:47, 7.29s/it] 79%|███████▉ | 9608/12188 [1:18:32<5:18:54, 7.42s/it] {'loss': 0.3379, 'grad_norm': 0.668225661054991, 'learning_rate': 1.1306839383937179e-06, 'epoch': 0.79} + 79%|███████▉ | 9608/12188 [1:18:32<5:18:54, 7.42s/it] 79%|███████▉ | 9609/12188 [1:18:41<5:34:13, 7.78s/it] {'loss': 0.2873, 'grad_norm': 0.7188311963385298, 'learning_rate': 1.129842535353086e-06, 'epoch': 0.79} + 79%|███████▉ | 9609/12188 [1:18:41<5:34:13, 7.78s/it] 79%|███████▉ | 9610/12188 [1:18:48<5:27:05, 7.61s/it] {'loss': 0.2976, 'grad_norm': 0.8488591155965975, 'learning_rate': 1.1290014056167586e-06, 'epoch': 0.79} + 79%|███████▉ | 9610/12188 [1:18:48<5:27:05, 7.61s/it] 79%|███████▉ | 9611/12188 [1:18:56<5:22:51, 7.52s/it] {'loss': 0.3101, 'grad_norm': 0.7889555593915561, 'learning_rate': 1.1281605492441306e-06, 'epoch': 0.79} + 79%|███████▉ | 9611/12188 [1:18:56<5:22:51, 7.52s/it] 79%|███████▉ | 9612/12188 [1:19:03<5:23:04, 7.52s/it] {'loss': 0.2883, 'grad_norm': 0.7818326159633858, 'learning_rate': 1.1273199662945856e-06, 'epoch': 0.79} + 79%|███████▉ | 9612/12188 [1:19:03<5:23:04, 7.52s/it] 79%|███████▉ | 9613/12188 [1:19:11<5:21:40, 7.50s/it] {'loss': 0.2862, 'grad_norm': 0.6880137389730324, 'learning_rate': 1.1264796568274811e-06, 'epoch': 0.79} + 79%|███████▉ | 9613/12188 [1:19:11<5:21:40, 7.50s/it] 79%|███████▉ | 9614/12188 [1:19:17<5:12:32, 7.29s/it] {'loss': 0.2882, 'grad_norm': 0.6954698362464777, 'learning_rate': 1.1256396209021598e-06, 'epoch': 0.79} + 79%|███████▉ | 9614/12188 [1:19:17<5:12:32, 7.29s/it] 79%|███████▉ | 9615/12188 [1:19:24<5:06:03, 7.14s/it] {'loss': 0.3076, 'grad_norm': 0.7560182165262116, 'learning_rate': 1.1247998585779451e-06, 'epoch': 0.79} + 79%|███████▉ | 9615/12188 [1:19:24<5:06:03, 7.14s/it] 79%|███████▉ | 9616/12188 [1:19:32<5:14:14, 7.33s/it] {'loss': 0.2975, 'grad_norm': 0.6900567105347082, 'learning_rate': 1.1239603699141377e-06, 'epoch': 0.79} + 79%|███████▉ | 9616/12188 [1:19:32<5:14:14, 7.33s/it] 79%|███████▉ | 9617/12188 [1:19:39<5:15:09, 7.36s/it] {'loss': 0.3029, 'grad_norm': 0.7728564504956428, 'learning_rate': 1.1231211549700227e-06, 'epoch': 0.79} + 79%|███████▉ | 9617/12188 [1:19:39<5:15:09, 7.36s/it] 79%|███████▉ | 9618/12188 [1:19:48<5:34:47, 7.82s/it] {'loss': 0.2806, 'grad_norm': 0.6634796530616577, 'learning_rate': 1.1222822138048617e-06, 'epoch': 0.79} + 79%|███████▉ | 9618/12188 [1:19:48<5:34:47, 7.82s/it] 79%|███████▉ | 9619/12188 [1:19:55<5:21:45, 7.51s/it] {'loss': 0.3308, 'grad_norm': 0.6901813758868665, 'learning_rate': 1.1214435464779006e-06, 'epoch': 0.79} + 79%|███████▉ | 9619/12188 [1:19:55<5:21:45, 7.51s/it] 79%|███████▉ | 9620/12188 [1:20:02<5:16:40, 7.40s/it] {'loss': 0.2672, 'grad_norm': 0.6414720997310577, 'learning_rate': 1.1206051530483663e-06, 'epoch': 0.79} + 79%|███████▉ | 9620/12188 [1:20:02<5:16:40, 7.40s/it] 79%|███████▉ | 9621/12188 [1:20:09<5:09:16, 7.23s/it] {'loss': 0.3325, 'grad_norm': 0.6968771996115808, 'learning_rate': 1.1197670335754634e-06, 'epoch': 0.79} + 79%|███████▉ | 9621/12188 [1:20:09<5:09:16, 7.23s/it] 79%|███████▉ | 9622/12188 [1:20:16<5:08:40, 7.22s/it] {'loss': 0.3108, 'grad_norm': 0.7160687899750834, 'learning_rate': 1.1189291881183773e-06, 'epoch': 0.79} + 79%|███████▉ | 9622/12188 [1:20:16<5:08:40, 7.22s/it] 79%|███████▉ | 9623/12188 [1:20:23<5:06:42, 7.17s/it] {'loss': 0.3565, 'grad_norm': 0.7174421178921622, 'learning_rate': 1.1180916167362777e-06, 'epoch': 0.79} + 79%|███████▉ | 9623/12188 [1:20:23<5:06:42, 7.17s/it] 79%|███████▉ | 9624/12188 [1:20:30<5:06:54, 7.18s/it] {'loss': 0.2998, 'grad_norm': 1.0023055964435794, 'learning_rate': 1.1172543194883095e-06, 'epoch': 0.79} + 79%|███████▉ | 9624/12188 [1:20:30<5:06:54, 7.18s/it] 79%|███████▉ | 9625/12188 [1:20:38<5:14:54, 7.37s/it] {'loss': 0.3101, 'grad_norm': 0.7142564740686341, 'learning_rate': 1.1164172964336051e-06, 'epoch': 0.79} + 79%|███████▉ | 9625/12188 [1:20:38<5:14:54, 7.37s/it] 79%|███████▉ | 9626/12188 [1:20:45<5:11:56, 7.31s/it] {'loss': 0.3007, 'grad_norm': 0.7832361962179645, 'learning_rate': 1.1155805476312704e-06, 'epoch': 0.79} + 79%|███████▉ | 9626/12188 [1:20:45<5:11:56, 7.31s/it] 79%|███████▉ | 9627/12188 [1:20:52<5:08:10, 7.22s/it] {'loss': 0.3194, 'grad_norm': 0.7394870364968618, 'learning_rate': 1.114744073140397e-06, 'epoch': 0.79} + 79%|███████▉ | 9627/12188 [1:20:52<5:08:10, 7.22s/it] 79%|███████▉ | 9628/12188 [1:21:00<5:14:23, 7.37s/it] {'loss': 0.3123, 'grad_norm': 0.7105820926959094, 'learning_rate': 1.113907873020056e-06, 'epoch': 0.79} + 79%|███████▉ | 9628/12188 [1:21:00<5:14:23, 7.37s/it] 79%|███████▉ | 9629/12188 [1:21:07<5:09:59, 7.27s/it] {'loss': 0.2948, 'grad_norm': 0.6868488385048797, 'learning_rate': 1.1130719473292957e-06, 'epoch': 0.79} + 79%|███████▉ | 9629/12188 [1:21:07<5:09:59, 7.27s/it] 79%|███████▉ | 9630/12188 [1:21:14<5:06:20, 7.19s/it] {'loss': 0.3023, 'grad_norm': 0.6640233362843462, 'learning_rate': 1.1122362961271516e-06, 'epoch': 0.79} + 79%|███████▉ | 9630/12188 [1:21:14<5:06:20, 7.19s/it] 79%|███████▉ | 9631/12188 [1:21:23<5:27:14, 7.68s/it] {'loss': 0.2822, 'grad_norm': 0.7933732629800669, 'learning_rate': 1.1114009194726322e-06, 'epoch': 0.79} + 79%|███████▉ | 9631/12188 [1:21:23<5:27:14, 7.68s/it] 79%|███████▉ | 9632/12188 [1:21:30<5:21:05, 7.54s/it] {'loss': 0.2864, 'grad_norm': 0.667246267229541, 'learning_rate': 1.110565817424733e-06, 'epoch': 0.79} + 79%|███████▉ | 9632/12188 [1:21:30<5:21:05, 7.54s/it] 79%|███████▉ | 9633/12188 [1:21:38<5:17:41, 7.46s/it] {'loss': 0.2852, 'grad_norm': 0.7866296897359484, 'learning_rate': 1.109730990042428e-06, 'epoch': 0.79} + 79%|███████▉ | 9633/12188 [1:21:38<5:17:41, 7.46s/it] 79%|███████▉ | 9634/12188 [1:21:46<5:25:47, 7.65s/it] {'loss': 0.3399, 'grad_norm': 0.7036022507244026, 'learning_rate': 1.108896437384669e-06, 'epoch': 0.79} + 79%|███████▉ | 9634/12188 [1:21:46<5:25:47, 7.65s/it] 79%|███████▉ | 9635/12188 [1:21:52<5:10:45, 7.30s/it] {'loss': 0.3134, 'grad_norm': 0.6794852446045092, 'learning_rate': 1.108062159510393e-06, 'epoch': 0.79} + 79%|███████▉ | 9635/12188 [1:21:52<5:10:45, 7.30s/it] 79%|███████▉ | 9636/12188 [1:21:59<5:03:43, 7.14s/it] {'loss': 0.3213, 'grad_norm': 0.6825751327571795, 'learning_rate': 1.1072281564785148e-06, 'epoch': 0.79} + 79%|███████▉ | 9636/12188 [1:21:59<5:03:43, 7.14s/it] 79%|███████▉ | 9637/12188 [1:22:07<5:13:44, 7.38s/it] {'loss': 0.3154, 'grad_norm': 0.7254060269083571, 'learning_rate': 1.106394428347929e-06, 'epoch': 0.79} + 79%|███████▉ | 9637/12188 [1:22:07<5:13:44, 7.38s/it] 79%|███████▉ | 9638/12188 [1:22:15<5:18:37, 7.50s/it] {'loss': 0.2829, 'grad_norm': 0.6607859319258728, 'learning_rate': 1.1055609751775138e-06, 'epoch': 0.79} + 79%|███████▉ | 9638/12188 [1:22:15<5:18:37, 7.50s/it] 79%|███████▉ | 9639/12188 [1:22:21<5:08:33, 7.26s/it] {'loss': 0.3366, 'grad_norm': 0.7334292326628956, 'learning_rate': 1.1047277970261245e-06, 'epoch': 0.79} + 79%|███████▉ | 9639/12188 [1:22:21<5:08:33, 7.26s/it] 79%|███████▉ | 9640/12188 [1:22:28<5:06:58, 7.23s/it] {'loss': 0.3506, 'grad_norm': 0.7321685100489616, 'learning_rate': 1.1038948939526e-06, 'epoch': 0.79} + 79%|███████▉ | 9640/12188 [1:22:28<5:06:58, 7.23s/it] 79%|███████▉ | 9641/12188 [1:22:36<5:05:42, 7.20s/it] {'loss': 0.2744, 'grad_norm': 0.7448389856182153, 'learning_rate': 1.1030622660157597e-06, 'epoch': 0.79} + 79%|███████▉ | 9641/12188 [1:22:36<5:05:42, 7.20s/it] 79%|███████▉ | 9642/12188 [1:22:43<5:10:22, 7.31s/it] {'loss': 0.2894, 'grad_norm': 0.6792834921590607, 'learning_rate': 1.1022299132744001e-06, 'epoch': 0.79} + 79%|███████▉ | 9642/12188 [1:22:43<5:10:22, 7.31s/it] 79%|███████▉ | 9643/12188 [1:22:51<5:16:46, 7.47s/it] {'loss': 0.2781, 'grad_norm': 0.7078649760291994, 'learning_rate': 1.1013978357873034e-06, 'epoch': 0.79} + 79%|███████▉ | 9643/12188 [1:22:51<5:16:46, 7.47s/it] 79%|███████▉ | 9644/12188 [1:22:58<5:13:59, 7.41s/it] {'loss': 0.3854, 'grad_norm': 0.6628914769666017, 'learning_rate': 1.1005660336132263e-06, 'epoch': 0.79} + 79%|███████▉ | 9644/12188 [1:22:58<5:13:59, 7.41s/it] 79%|███████▉ | 9645/12188 [1:23:05<5:06:42, 7.24s/it] {'loss': 0.3007, 'grad_norm': 0.7726802502795505, 'learning_rate': 1.0997345068109104e-06, 'epoch': 0.79} + 79%|███████▉ | 9645/12188 [1:23:05<5:06:42, 7.24s/it] 79%|███████▉ | 9646/12188 [1:23:12<5:01:26, 7.12s/it] {'loss': 0.267, 'grad_norm': 0.6595926134940389, 'learning_rate': 1.0989032554390795e-06, 'epoch': 0.79} + 79%|███████▉ | 9646/12188 [1:23:12<5:01:26, 7.12s/it] 79%|███████▉ | 9647/12188 [1:23:22<5:37:17, 7.96s/it] {'loss': 0.307, 'grad_norm': 0.7806418853715403, 'learning_rate': 1.0980722795564319e-06, 'epoch': 0.79} + 79%|███████▉ | 9647/12188 [1:23:22<5:37:17, 7.96s/it] 79%|███████▉ | 9648/12188 [1:23:29<5:24:00, 7.65s/it] {'loss': 0.3115, 'grad_norm': 0.8020984070586159, 'learning_rate': 1.0972415792216518e-06, 'epoch': 0.79} + 79%|███████▉ | 9648/12188 [1:23:29<5:24:00, 7.65s/it] 79%|███████▉ | 9649/12188 [1:23:36<5:15:50, 7.46s/it] {'loss': 0.2651, 'grad_norm': 0.691690440127166, 'learning_rate': 1.0964111544934014e-06, 'epoch': 0.79} + 79%|███████▉ | 9649/12188 [1:23:36<5:15:50, 7.46s/it] 79%|███████▉ | 9650/12188 [1:23:43<5:08:21, 7.29s/it] {'loss': 0.3319, 'grad_norm': 0.7003450064553851, 'learning_rate': 1.0955810054303223e-06, 'epoch': 0.79} + 79%|███████▉ | 9650/12188 [1:23:43<5:08:21, 7.29s/it] 79%|███████▉ | 9651/12188 [1:23:52<5:40:02, 8.04s/it] {'loss': 0.2973, 'grad_norm': 0.6732345953659882, 'learning_rate': 1.0947511320910409e-06, 'epoch': 0.79} + 79%|███████▉ | 9651/12188 [1:23:53<5:40:02, 8.04s/it] 79%|███████▉ | 9652/12188 [1:23:59<5:24:02, 7.67s/it] {'loss': 0.2689, 'grad_norm': 0.6165510530489406, 'learning_rate': 1.093921534534159e-06, 'epoch': 0.79} + 79%|███████▉ | 9652/12188 [1:23:59<5:24:02, 7.67s/it] 79%|███████▉ | 9653/12188 [1:24:07<5:28:42, 7.78s/it] {'loss': 0.2785, 'grad_norm': 0.7020258272228573, 'learning_rate': 1.0930922128182642e-06, 'epoch': 0.79} + 79%|███████▉ | 9653/12188 [1:24:07<5:28:42, 7.78s/it] 79%|███████▉ | 9654/12188 [1:24:14<5:17:52, 7.53s/it] {'loss': 0.2921, 'grad_norm': 0.6081867175406243, 'learning_rate': 1.092263167001919e-06, 'epoch': 0.79} + 79%|███████▉ | 9654/12188 [1:24:14<5:17:52, 7.53s/it] 79%|███████▉ | 9655/12188 [1:24:23<5:28:04, 7.77s/it] {'loss': 0.2917, 'grad_norm': 0.6603779971246475, 'learning_rate': 1.0914343971436703e-06, 'epoch': 0.79} + 79%|███████▉ | 9655/12188 [1:24:23<5:28:04, 7.77s/it] 79%|███████▉ | 9656/12188 [1:24:30<5:23:05, 7.66s/it] {'loss': 0.2856, 'grad_norm': 0.6464052579368882, 'learning_rate': 1.0906059033020466e-06, 'epoch': 0.79} + 79%|███████▉ | 9656/12188 [1:24:30<5:23:05, 7.66s/it] 79%|███████▉ | 9657/12188 [1:24:37<5:14:48, 7.46s/it] {'loss': 0.2971, 'grad_norm': 0.6367161417174462, 'learning_rate': 1.0897776855355518e-06, 'epoch': 0.79} + 79%|███████▉ | 9657/12188 [1:24:37<5:14:48, 7.46s/it] 79%|███████▉ | 9658/12188 [1:24:44<5:09:38, 7.34s/it] {'loss': 0.2669, 'grad_norm': 0.6756396375477429, 'learning_rate': 1.0889497439026747e-06, 'epoch': 0.79} + 79%|███████▉ | 9658/12188 [1:24:44<5:09:38, 7.34s/it] 79%|███████▉ | 9659/12188 [1:24:51<5:00:41, 7.13s/it] {'loss': 0.2734, 'grad_norm': 0.7057180937742957, 'learning_rate': 1.088122078461884e-06, 'epoch': 0.79} + 79%|███████▉ | 9659/12188 [1:24:51<5:00:41, 7.13s/it] 79%|███████▉ | 9660/12188 [1:24:59<5:12:42, 7.42s/it] {'loss': 0.34, 'grad_norm': 0.7171233817355803, 'learning_rate': 1.0872946892716264e-06, 'epoch': 0.79} + 79%|███████▉ | 9660/12188 [1:24:59<5:12:42, 7.42s/it] 79%|███████▉ | 9661/12188 [1:25:07<5:25:40, 7.73s/it] {'loss': 0.337, 'grad_norm': 0.70969892002735, 'learning_rate': 1.0864675763903327e-06, 'epoch': 0.79} + 79%|███████▉ | 9661/12188 [1:25:07<5:25:40, 7.73s/it] 79%|███████▉ | 9662/12188 [1:25:14<5:18:32, 7.57s/it] {'loss': 0.3369, 'grad_norm': 0.869901671489642, 'learning_rate': 1.0856407398764096e-06, 'epoch': 0.79} + 79%|███████▉ | 9662/12188 [1:25:14<5:18:32, 7.57s/it] 79%|███████▉ | 9663/12188 [1:25:25<5:51:13, 8.35s/it] {'loss': 0.2679, 'grad_norm': 0.671226220348087, 'learning_rate': 1.08481417978825e-06, 'epoch': 0.79} + 79%|███████▉ | 9663/12188 [1:25:25<5:51:13, 8.35s/it] 79%|███████▉ | 9664/12188 [1:25:35<6:17:29, 8.97s/it] {'loss': 0.2902, 'grad_norm': 0.733161868407346, 'learning_rate': 1.0839878961842232e-06, 'epoch': 0.79} + 79%|███████▉ | 9664/12188 [1:25:35<6:17:29, 8.97s/it] 79%|███████▉ | 9665/12188 [1:25:45<6:24:18, 9.14s/it] {'loss': 0.2892, 'grad_norm': 0.6999938149780257, 'learning_rate': 1.083161889122678e-06, 'epoch': 0.79} + 79%|███████▉ | 9665/12188 [1:25:45<6:24:18, 9.14s/it] 79%|███████▉ | 9666/12188 [1:25:52<5:57:05, 8.50s/it] {'loss': 0.3044, 'grad_norm': 0.8170445810528847, 'learning_rate': 1.0823361586619485e-06, 'epoch': 0.79} + 79%|███████▉ | 9666/12188 [1:25:52<5:57:05, 8.50s/it] 79%|███████▉ | 9667/12188 [1:25:59<5:40:32, 8.10s/it] {'loss': 0.2974, 'grad_norm': 0.7042764366602295, 'learning_rate': 1.081510704860344e-06, 'epoch': 0.79} + 79%|███████▉ | 9667/12188 [1:25:59<5:40:32, 8.10s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fba352c6b10> +[Try #0] Failed to fetch sample 4423630 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fba352c6b10> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Language settings'"}, {'from': 'gpt', 'value': '\nclick(x=0.8875, y=0.493)\n'}]} + 79%|███████▉ | 9668/12188 [1:26:07<5:45:02, 8.22s/it] {'loss': 0.2473, 'grad_norm': 0.7379183772248418, 'learning_rate': 1.0806855277761579e-06, 'epoch': 0.79} + 79%|███████▉ | 9668/12188 [1:26:07<5:45:02, 8.22s/it] 79%|███████▉ | 9669/12188 [1:26:14<5:29:26, 7.85s/it] {'loss': 0.2993, 'grad_norm': 0.7500410434590674, 'learning_rate': 1.0798606274676643e-06, 'epoch': 0.79} + 79%|███████▉ | 9669/12188 [1:26:14<5:29:26, 7.85s/it] 79%|███████▉ | 9670/12188 [1:26:21<5:17:27, 7.56s/it] {'loss': 0.2817, 'grad_norm': 0.7105970930085642, 'learning_rate': 1.0790360039931142e-06, 'epoch': 0.79} + 79%|███████▉ | 9670/12188 [1:26:21<5:17:27, 7.56s/it] 79%|███████▉ | 9671/12188 [1:26:28<5:09:51, 7.39s/it] {'loss': 0.3287, 'grad_norm': 0.7052506218025753, 'learning_rate': 1.0782116574107427e-06, 'epoch': 0.79} + 79%|███████▉ | 9671/12188 [1:26:28<5:09:51, 7.39s/it] 79%|███████▉ | 9672/12188 [1:26:36<5:19:02, 7.61s/it] {'loss': 0.2996, 'grad_norm': 0.6981995014063488, 'learning_rate': 1.0773875877787621e-06, 'epoch': 0.79} + 79%|███████▉ | 9672/12188 [1:26:36<5:19:02, 7.61s/it] 79%|███████▉ | 9673/12188 [1:26:43<5:08:54, 7.37s/it] {'loss': 0.2903, 'grad_norm': 0.6888654153079027, 'learning_rate': 1.0765637951553675e-06, 'epoch': 0.79} + 79%|███████▉ | 9673/12188 [1:26:43<5:08:54, 7.37s/it] 79%|███████▉ | 9674/12188 [1:26:50<5:02:44, 7.23s/it] {'loss': 0.3131, 'grad_norm': 0.7922690748918657, 'learning_rate': 1.0757402795987359e-06, 'epoch': 0.79} + 79%|███████▉ | 9674/12188 [1:26:50<5:02:44, 7.23s/it] 79%|███████▉ | 9675/12188 [1:26:57<5:00:13, 7.17s/it] {'loss': 0.286, 'grad_norm': 0.8828637389576498, 'learning_rate': 1.07491704116702e-06, 'epoch': 0.79} + 79%|███████▉ | 9675/12188 [1:26:57<5:00:13, 7.17s/it] 79%|███████▉ | 9676/12188 [1:27:06<5:18:13, 7.60s/it] {'loss': 0.2613, 'grad_norm': 0.6181893881132039, 'learning_rate': 1.0740940799183574e-06, 'epoch': 0.79} + 79%|███████▉ | 9676/12188 [1:27:06<5:18:13, 7.60s/it] 79%|███████▉ | 9677/12188 [1:27:13<5:20:03, 7.65s/it] {'loss': 0.3034, 'grad_norm': 0.7878245225219478, 'learning_rate': 1.073271395910863e-06, 'epoch': 0.79} + 79%|███████▉ | 9677/12188 [1:27:13<5:20:03, 7.65s/it] 79%|███████▉ | 9678/12188 [1:27:21<5:14:12, 7.51s/it] {'loss': 0.2827, 'grad_norm': 0.6454530264699171, 'learning_rate': 1.0724489892026336e-06, 'epoch': 0.79} + 79%|███████▉ | 9678/12188 [1:27:21<5:14:12, 7.51s/it] 79%|███████▉ | 9679/12188 [1:27:28<5:15:42, 7.55s/it] {'loss': 0.3018, 'grad_norm': 0.6667697941905488, 'learning_rate': 1.071626859851747e-06, 'epoch': 0.79} + 79%|███████▉ | 9679/12188 [1:27:28<5:15:42, 7.55s/it] 79%|███████▉ | 9680/12188 [1:27:35<5:11:56, 7.46s/it] {'loss': 0.288, 'grad_norm': 0.6446193549550975, 'learning_rate': 1.070805007916259e-06, 'epoch': 0.79} + 79%|███████▉ | 9680/12188 [1:27:35<5:11:56, 7.46s/it] 79%|███████▉ | 9681/12188 [1:27:42<5:05:36, 7.31s/it] {'loss': 0.3024, 'grad_norm': 0.6495007156037393, 'learning_rate': 1.0699834334542086e-06, 'epoch': 0.79} + 79%|███████▉ | 9681/12188 [1:27:42<5:05:36, 7.31s/it] 79%|███████▉ | 9682/12188 [1:27:50<5:04:12, 7.28s/it] {'loss': 0.2931, 'grad_norm': 0.6785546445731079, 'learning_rate': 1.0691621365236154e-06, 'epoch': 0.79} + 79%|███████▉ | 9682/12188 [1:27:50<5:04:12, 7.28s/it] 79%|███████▉ | 9683/12188 [1:27:57<5:01:50, 7.23s/it] {'loss': 0.2706, 'grad_norm': 0.6681383966409612, 'learning_rate': 1.0683411171824754e-06, 'epoch': 0.79} + 79%|███████▉ | 9683/12188 [1:27:57<5:01:50, 7.23s/it] 79%|███████▉ | 9684/12188 [1:28:04<5:00:59, 7.21s/it] {'loss': 0.3156, 'grad_norm': 0.6187231342283733, 'learning_rate': 1.0675203754887703e-06, 'epoch': 0.79} + 79%|███████▉ | 9684/12188 [1:28:04<5:00:59, 7.21s/it] 79%|███████▉ | 9685/12188 [1:28:11<5:00:20, 7.20s/it] {'loss': 0.2989, 'grad_norm': 0.6518623346990626, 'learning_rate': 1.0666999115004572e-06, 'epoch': 0.79} + 79%|███████▉ | 9685/12188 [1:28:11<5:00:20, 7.20s/it] 79%|███████▉ | 9686/12188 [1:28:18<4:59:23, 7.18s/it] {'loss': 0.2827, 'grad_norm': 0.9841495842413076, 'learning_rate': 1.0658797252754766e-06, 'epoch': 0.79} + 79%|███████▉ | 9686/12188 [1:28:18<4:59:23, 7.18s/it] 79%|███████▉ | 9687/12188 [1:28:26<5:01:09, 7.22s/it] {'loss': 0.2974, 'grad_norm': 0.7088717682164004, 'learning_rate': 1.0650598168717507e-06, 'epoch': 0.79} + 79%|███████▉ | 9687/12188 [1:28:26<5:01:09, 7.22s/it] 79%|███████▉ | 9688/12188 [1:28:32<4:57:53, 7.15s/it] {'loss': 0.2664, 'grad_norm': 0.7341065697917314, 'learning_rate': 1.0642401863471774e-06, 'epoch': 0.79} + 79%|███████▉ | 9688/12188 [1:28:32<4:57:53, 7.15s/it] 79%|███████▉ | 9689/12188 [1:28:40<4:56:46, 7.13s/it] {'loss': 0.33, 'grad_norm': 0.7534556582570283, 'learning_rate': 1.0634208337596403e-06, 'epoch': 0.79} + 79%|███████▉ | 9689/12188 [1:28:40<4:56:46, 7.13s/it] 80%|███████▉ | 9690/12188 [1:28:47<4:57:33, 7.15s/it] {'loss': 0.303, 'grad_norm': 0.7532805179725515, 'learning_rate': 1.0626017591669973e-06, 'epoch': 0.8} + 80%|███████▉ | 9690/12188 [1:28:47<4:57:33, 7.15s/it] 80%|███████▉ | 9691/12188 [1:28:57<5:35:19, 8.06s/it] {'loss': 0.3067, 'grad_norm': 0.6552303849006605, 'learning_rate': 1.0617829626270942e-06, 'epoch': 0.8} + 80%|███████▉ | 9691/12188 [1:28:57<5:35:19, 8.06s/it] 80%|███████▉ | 9692/12188 [1:29:04<5:18:15, 7.65s/it] {'loss': 0.28, 'grad_norm': 0.7465083557138046, 'learning_rate': 1.0609644441977507e-06, 'epoch': 0.8} + 80%|███████▉ | 9692/12188 [1:29:04<5:18:15, 7.65s/it] 80%|███████▉ | 9693/12188 [1:29:11<5:10:40, 7.47s/it] {'loss': 0.3572, 'grad_norm': 0.748594285574528, 'learning_rate': 1.0601462039367683e-06, 'epoch': 0.8} + 80%|███████▉ | 9693/12188 [1:29:11<5:10:40, 7.47s/it] 80%|███████▉ | 9694/12188 [1:29:18<5:05:23, 7.35s/it] {'loss': 0.2815, 'grad_norm': 0.788036845522253, 'learning_rate': 1.0593282419019313e-06, 'epoch': 0.8} + 80%|███████▉ | 9694/12188 [1:29:18<5:05:23, 7.35s/it] 80%|███████▉ | 9695/12188 [1:29:28<5:43:04, 8.26s/it] {'loss': 0.2841, 'grad_norm': 0.6623159516768088, 'learning_rate': 1.058510558151004e-06, 'epoch': 0.8} + 80%|███████▉ | 9695/12188 [1:29:28<5:43:04, 8.26s/it] 80%|███████▉ | 9696/12188 [1:29:35<5:26:59, 7.87s/it] {'loss': 0.3332, 'grad_norm': 0.7489118829644695, 'learning_rate': 1.0576931527417273e-06, 'epoch': 0.8} + 80%|███████▉ | 9696/12188 [1:29:35<5:26:59, 7.87s/it] 80%|███████▉ | 9697/12188 [1:29:43<5:30:06, 7.95s/it] {'loss': 0.3024, 'grad_norm': 0.698726292461689, 'learning_rate': 1.0568760257318273e-06, 'epoch': 0.8} + 80%|███████▉ | 9697/12188 [1:29:43<5:30:06, 7.95s/it] 80%|███████▉ | 9698/12188 [1:29:50<5:15:26, 7.60s/it] {'loss': 0.3332, 'grad_norm': 0.6397241519608899, 'learning_rate': 1.0560591771790062e-06, 'epoch': 0.8} + 80%|███████▉ | 9698/12188 [1:29:50<5:15:26, 7.60s/it] 80%|███████▉ | 9699/12188 [1:29:57<5:09:52, 7.47s/it] {'loss': 0.3158, 'grad_norm': 0.752680867159014, 'learning_rate': 1.0552426071409494e-06, 'epoch': 0.8} + 80%|███████▉ | 9699/12188 [1:29:57<5:09:52, 7.47s/it] 80%|███████▉ | 9700/12188 [1:30:05<5:18:55, 7.69s/it] {'loss': 0.3108, 'grad_norm': 0.7987157795931096, 'learning_rate': 1.0544263156753238e-06, 'epoch': 0.8} + 80%|███████▉ | 9700/12188 [1:30:05<5:18:55, 7.69s/it] 80%|███████▉ | 9701/12188 [1:30:12<5:06:10, 7.39s/it] {'loss': 0.3388, 'grad_norm': 0.7782564202505178, 'learning_rate': 1.053610302839771e-06, 'epoch': 0.8} + 80%|███████▉ | 9701/12188 [1:30:12<5:06:10, 7.39s/it] 80%|███████▉ | 9702/12188 [1:30:21<5:24:38, 7.84s/it] {'loss': 0.2796, 'grad_norm': 0.6426458923844272, 'learning_rate': 1.0527945686919195e-06, 'epoch': 0.8} + 80%|███████▉ | 9702/12188 [1:30:21<5:24:38, 7.84s/it] 80%|███████▉ | 9703/12188 [1:30:28<5:11:41, 7.53s/it] {'loss': 0.2846, 'grad_norm': 0.6623357372305415, 'learning_rate': 1.0519791132893725e-06, 'epoch': 0.8} + 80%|███████▉ | 9703/12188 [1:30:28<5:11:41, 7.53s/it] 80%|███████▉ | 9704/12188 [1:30:35<5:02:40, 7.31s/it] {'loss': 0.3051, 'grad_norm': 0.6929786531561708, 'learning_rate': 1.0511639366897193e-06, 'epoch': 0.8} + 80%|███████▉ | 9704/12188 [1:30:35<5:02:40, 7.31s/it] 80%|███████▉ | 9705/12188 [1:30:42<4:59:03, 7.23s/it] {'loss': 0.3073, 'grad_norm': 0.6555037390926991, 'learning_rate': 1.0503490389505244e-06, 'epoch': 0.8} + 80%|███████▉ | 9705/12188 [1:30:42<4:59:03, 7.23s/it] 80%|███████▉ | 9706/12188 [1:30:49<4:59:22, 7.24s/it] {'loss': 0.3027, 'grad_norm': 0.7660649132045481, 'learning_rate': 1.0495344201293335e-06, 'epoch': 0.8} + 80%|███████▉ | 9706/12188 [1:30:49<4:59:22, 7.24s/it] 80%|███████▉ | 9707/12188 [1:30:56<4:58:53, 7.23s/it] {'loss': 0.2586, 'grad_norm': 0.6562603273878431, 'learning_rate': 1.0487200802836762e-06, 'epoch': 0.8} + 80%|███████▉ | 9707/12188 [1:30:56<4:58:53, 7.23s/it] 80%|███████▉ | 9708/12188 [1:31:03<5:00:12, 7.26s/it] {'loss': 0.2796, 'grad_norm': 0.7136175444693272, 'learning_rate': 1.0479060194710571e-06, 'epoch': 0.8} + 80%|███████▉ | 9708/12188 [1:31:03<5:00:12, 7.26s/it] 80%|███████▉ | 9709/12188 [1:31:10<4:56:55, 7.19s/it] {'loss': 0.2859, 'grad_norm': 0.7177127369910634, 'learning_rate': 1.0470922377489656e-06, 'epoch': 0.8} + 80%|███████▉ | 9709/12188 [1:31:10<4:56:55, 7.19s/it] 80%|███████▉ | 9710/12188 [1:31:17<4:53:31, 7.11s/it] {'loss': 0.3061, 'grad_norm': 0.6590590022644492, 'learning_rate': 1.0462787351748705e-06, 'epoch': 0.8} + 80%|███████▉ | 9710/12188 [1:31:17<4:53:31, 7.11s/it] 80%|███████▉ | 9711/12188 [1:31:24<4:52:26, 7.08s/it] {'loss': 0.3081, 'grad_norm': 0.7430890328813978, 'learning_rate': 1.045465511806218e-06, 'epoch': 0.8} + 80%|███████▉ | 9711/12188 [1:31:24<4:52:26, 7.08s/it] 80%|███████▉ | 9712/12188 [1:31:31<4:49:52, 7.02s/it] {'loss': 0.2829, 'grad_norm': 0.6915932012984729, 'learning_rate': 1.044652567700437e-06, 'epoch': 0.8} + 80%|███████▉ | 9712/12188 [1:31:31<4:49:52, 7.02s/it] 80%|███████▉ | 9713/12188 [1:31:38<4:52:00, 7.08s/it] {'loss': 0.3248, 'grad_norm': 0.6820506918777985, 'learning_rate': 1.0438399029149393e-06, 'epoch': 0.8} + 80%|███████▉ | 9713/12188 [1:31:38<4:52:00, 7.08s/it] 80%|███████▉ | 9714/12188 [1:31:45<4:51:06, 7.06s/it] {'loss': 0.2783, 'grad_norm': 0.6511126983495595, 'learning_rate': 1.0430275175071097e-06, 'epoch': 0.8} + 80%|███████▉ | 9714/12188 [1:31:45<4:51:06, 7.06s/it] 80%|███████▉ | 9715/12188 [1:31:56<5:38:19, 8.21s/it] {'loss': 0.2699, 'grad_norm': 0.8600290328424022, 'learning_rate': 1.0422154115343207e-06, 'epoch': 0.8} + 80%|███████▉ | 9715/12188 [1:31:56<5:38:19, 8.21s/it] 80%|███████▉ | 9716/12188 [1:32:04<5:29:26, 8.00s/it] {'loss': 0.2882, 'grad_norm': 0.6847340755619638, 'learning_rate': 1.0414035850539194e-06, 'epoch': 0.8} + 80%|███████▉ | 9716/12188 [1:32:04<5:29:26, 8.00s/it] 80%|██��████▉ | 9717/12188 [1:32:11<5:17:01, 7.70s/it] {'loss': 0.3033, 'grad_norm': 0.7312363803572416, 'learning_rate': 1.0405920381232382e-06, 'epoch': 0.8} + 80%|███████▉ | 9717/12188 [1:32:11<5:17:01, 7.70s/it] 80%|███████▉ | 9718/12188 [1:32:18<5:11:53, 7.58s/it] {'loss': 0.2836, 'grad_norm': 0.6554196538556682, 'learning_rate': 1.039780770799585e-06, 'epoch': 0.8} + 80%|███████▉ | 9718/12188 [1:32:18<5:11:53, 7.58s/it] 80%|███████▉ | 9719/12188 [1:32:26<5:17:50, 7.72s/it] {'loss': 0.3014, 'grad_norm': 0.6830131626699958, 'learning_rate': 1.0389697831402518e-06, 'epoch': 0.8} + 80%|███████▉ | 9719/12188 [1:32:26<5:17:50, 7.72s/it] 80%|███████▉ | 9720/12188 [1:32:33<5:10:40, 7.55s/it] {'loss': 0.3046, 'grad_norm': 0.6956710290113618, 'learning_rate': 1.0381590752025094e-06, 'epoch': 0.8} + 80%|███████▉ | 9720/12188 [1:32:33<5:10:40, 7.55s/it] 80%|███████▉ | 9721/12188 [1:32:40<5:03:03, 7.37s/it] {'loss': 0.3071, 'grad_norm': 0.7238205543691911, 'learning_rate': 1.037348647043606e-06, 'epoch': 0.8} + 80%|███████▉ | 9721/12188 [1:32:40<5:03:03, 7.37s/it] 80%|███████▉ | 9722/12188 [1:32:48<5:02:09, 7.35s/it] {'loss': 0.3051, 'grad_norm': 0.7460662999460426, 'learning_rate': 1.036538498720775e-06, 'epoch': 0.8} + 80%|███████▉ | 9722/12188 [1:32:48<5:02:09, 7.35s/it] 80%|███████▉ | 9723/12188 [1:32:55<4:58:48, 7.27s/it] {'loss': 0.3039, 'grad_norm': 0.6759811409598785, 'learning_rate': 1.035728630291229e-06, 'epoch': 0.8} + 80%|███████▉ | 9723/12188 [1:32:55<4:58:48, 7.27s/it] 80%|███████▉ | 9724/12188 [1:33:05<5:40:07, 8.28s/it] {'loss': 0.2984, 'grad_norm': 0.6811990365699983, 'learning_rate': 1.0349190418121563e-06, 'epoch': 0.8} + 80%|███████▉ | 9724/12188 [1:33:05<5:40:07, 8.28s/it] 80%|███████▉ | 9725/12188 [1:33:13<5:29:56, 8.04s/it] {'loss': 0.3394, 'grad_norm': 0.6908456411908239, 'learning_rate': 1.0341097333407307e-06, 'epoch': 0.8} + 80%|███████▉ | 9725/12188 [1:33:13<5:29:56, 8.04s/it] 80%|███████▉ | 9726/12188 [1:33:20<5:19:03, 7.78s/it] {'loss': 0.2837, 'grad_norm': 0.6911655693450823, 'learning_rate': 1.0333007049341053e-06, 'epoch': 0.8} + 80%|███████▉ | 9726/12188 [1:33:20<5:19:03, 7.78s/it] 80%|███████▉ | 9727/12188 [1:33:27<5:11:25, 7.59s/it] {'loss': 0.3057, 'grad_norm': 0.6789092634400771, 'learning_rate': 1.0324919566494097e-06, 'epoch': 0.8} + 80%|███████▉ | 9727/12188 [1:33:27<5:11:25, 7.59s/it] 80%|███████▉ | 9728/12188 [1:33:34<5:05:42, 7.46s/it] {'loss': 0.3197, 'grad_norm': 0.7388128327476113, 'learning_rate': 1.0316834885437594e-06, 'epoch': 0.8} + 80%|███████▉ | 9728/12188 [1:33:34<5:05:42, 7.46s/it] 80%|███████▉ | 9729/12188 [1:33:42<5:07:57, 7.51s/it] {'loss': 0.3152, 'grad_norm': 0.7292884806052073, 'learning_rate': 1.0308753006742439e-06, 'epoch': 0.8} + 80%|███████▉ | 9729/12188 [1:33:42<5:07:57, 7.51s/it] 80%|███████▉ | 9730/12188 [1:33:49<5:01:36, 7.36s/it] {'loss': 0.2904, 'grad_norm': 0.6879670056739093, 'learning_rate': 1.0300673930979383e-06, 'epoch': 0.8} + 80%|███████▉ | 9730/12188 [1:33:49<5:01:36, 7.36s/it] 80%|███████▉ | 9731/12188 [1:33:58<5:16:26, 7.73s/it] {'loss': 0.2659, 'grad_norm': 0.6319502062091653, 'learning_rate': 1.0292597658718962e-06, 'epoch': 0.8} + 80%|███████▉ | 9731/12188 [1:33:58<5:16:26, 7.73s/it] 80%|███████▉ | 9732/12188 [1:34:04<5:06:24, 7.49s/it] {'loss': 0.2863, 'grad_norm': 0.7951552401218481, 'learning_rate': 1.0284524190531497e-06, 'epoch': 0.8} + 80%|███████▉ | 9732/12188 [1:34:04<5:06:24, 7.49s/it] 80%|███████▉ | 9733/12188 [1:34:12<5:01:05, 7.36s/it] {'loss': 0.2983, 'grad_norm': 0.841256180801021, 'learning_rate': 1.0276453526987112e-06, 'epoch': 0.8} + 80%|███████▉ | 9733/12188 [1:34:12<5:01:05, 7.36s/it] 80%|███████▉ | 9734/12188 [1:34:20<5:11:04, 7.61s/it] {'loss': 0.3159, 'grad_norm': 0.7267228141893712, 'learning_rate': 1.0268385668655779e-06, 'epoch': 0.8} + 80%|███████▉ | 9734/12188 [1:34:20<5:11:04, 7.61s/it] 80%|███████▉ | 9735/12188 [1:34:27<5:11:23, 7.62s/it] {'loss': 0.2752, 'grad_norm': 0.6461241297753051, 'learning_rate': 1.0260320616107194e-06, 'epoch': 0.8} + 80%|███████▉ | 9735/12188 [1:34:27<5:11:23, 7.62s/it] 80%|███████▉ | 9736/12188 [1:34:34<5:02:11, 7.39s/it] {'loss': 0.3103, 'grad_norm': 0.6905544255990491, 'learning_rate': 1.0252258369910938e-06, 'epoch': 0.8} + 80%|███████▉ | 9736/12188 [1:34:34<5:02:11, 7.39s/it] 80%|███████▉ | 9737/12188 [1:34:41<4:54:56, 7.22s/it] {'loss': 0.3508, 'grad_norm': 0.6801312996012808, 'learning_rate': 1.0244198930636329e-06, 'epoch': 0.8} + 80%|███████▉ | 9737/12188 [1:34:41<4:54:56, 7.22s/it] 80%|███████▉ | 9738/12188 [1:34:48<4:50:08, 7.11s/it] {'loss': 0.2697, 'grad_norm': 0.6862616144605563, 'learning_rate': 1.0236142298852524e-06, 'epoch': 0.8} + 80%|███████▉ | 9738/12188 [1:34:48<4:50:08, 7.11s/it] 80%|███████▉ | 9739/12188 [1:34:55<4:51:48, 7.15s/it] {'loss': 0.3299, 'grad_norm': 0.7361527138373231, 'learning_rate': 1.0228088475128456e-06, 'epoch': 0.8} + 80%|███████▉ | 9739/12188 [1:34:55<4:51:48, 7.15s/it] 80%|███████▉ | 9740/12188 [1:35:02<4:44:03, 6.96s/it] {'loss': 0.3312, 'grad_norm': 0.691151808806186, 'learning_rate': 1.0220037460032877e-06, 'epoch': 0.8} + 80%|███████▉ | 9740/12188 [1:35:02<4:44:03, 6.96s/it] 80%|███████▉ | 9741/12188 [1:35:09<4:47:20, 7.05s/it] {'loss': 0.2748, 'grad_norm': 0.7222641393571314, 'learning_rate': 1.0211989254134358e-06, 'epoch': 0.8} + 80%|███████▉ | 9741/12188 [1:35:09<4:47:20, 7.05s/it] 80%|███████▉ | 9742/12188 [1:35:19<5:20:48, 7.87s/it] {'loss': 0.3343, 'grad_norm': 0.6630360571728401, 'learning_rate': 1.020394385800122e-06, 'epoch': 0.8} + 80%|███████▉ | 9742/12188 [1:35:19<5:20:48, 7.87s/it] 80%|███████▉ | 9743/12188 [1:35:25<5:07:04, 7.54s/it] {'loss': 0.2926, 'grad_norm': 0.6258425836827777, 'learning_rate': 1.0195901272201625e-06, 'epoch': 0.8} + 80%|███████▉ | 9743/12188 [1:35:25<5:07:04, 7.54s/it] 80%|███████▉ | 9744/12188 [1:35:34<5:13:35, 7.70s/it] {'loss': 0.3141, 'grad_norm': 0.6305658484180072, 'learning_rate': 1.0187861497303547e-06, 'epoch': 0.8} + 80%|███████▉ | 9744/12188 [1:35:34<5:13:35, 7.70s/it] 80%|███████▉ | 9745/12188 [1:35:41<5:10:25, 7.62s/it] {'loss': 0.3054, 'grad_norm': 0.7113479767628508, 'learning_rate': 1.0179824533874733e-06, 'epoch': 0.8} + 80%|███████▉ | 9745/12188 [1:35:41<5:10:25, 7.62s/it] 80%|███████▉ | 9746/12188 [1:35:49<5:13:06, 7.69s/it] {'loss': 0.2958, 'grad_norm': 0.703694630264181, 'learning_rate': 1.0171790382482717e-06, 'epoch': 0.8} + 80%|███████▉ | 9746/12188 [1:35:49<5:13:06, 7.69s/it] 80%|███████▉ | 9747/12188 [1:35:56<5:04:01, 7.47s/it] {'loss': 0.3153, 'grad_norm': 0.7581092566660904, 'learning_rate': 1.0163759043694887e-06, 'epoch': 0.8} + 80%|███████▉ | 9747/12188 [1:35:56<5:04:01, 7.47s/it] 80%|███████▉ | 9748/12188 [1:36:03<5:00:34, 7.39s/it] {'loss': 0.3186, 'grad_norm': 0.6707064729862992, 'learning_rate': 1.0155730518078378e-06, 'epoch': 0.8} + 80%|███████▉ | 9748/12188 [1:36:03<5:00:34, 7.39s/it] 80%|███████▉ | 9749/12188 [1:36:14<5:44:44, 8.48s/it] {'loss': 0.2884, 'grad_norm': 0.6809130719065786, 'learning_rate': 1.0147704806200182e-06, 'epoch': 0.8} + 80%|███████▉ | 9749/12188 [1:36:14<5:44:44, 8.48s/it] 80%|███████▉ | 9750/12188 [1:36:22<5:32:59, 8.20s/it] {'loss': 0.2676, 'grad_norm': 0.6590389507740514, 'learning_rate': 1.013968190862703e-06, 'epoch': 0.8} + 80%|███████▉ | 9750/12188 [1:36:22<5:32:59, 8.20s/it] 80%|████████ | 9751/12188 [1:36:30<5:40:31, 8.38s/it] {'loss': 0.2947, 'grad_norm': 0.6489121474698356, 'learning_rate': 1.013166182592551e-06, 'epoch': 0.8} + 80%|████████ | 9751/12188 [1:36:30<5:40:31, 8.38s/it] 80%|████████ | 9752/12188 [1:36:39<5:40:20, 8.38s/it] {'loss': 0.3535, 'grad_norm': 0.7200977102546422, 'learning_rate': 1.0123644558661972e-06, 'epoch': 0.8} + 80%|████████ | 9752/12188 [1:36:39<5:40:20, 8.38s/it] 80%|████████ | 9753/12188 [1:36:47<5:33:31, 8.22s/it] {'loss': 0.2934, 'grad_norm': 0.7723103388491903, 'learning_rate': 1.0115630107402586e-06, 'epoch': 0.8} + 80%|████████ | 9753/12188 [1:36:47<5:33:31, 8.22s/it] 80%|████████ | 9754/12188 [1:36:54<5:27:55, 8.08s/it] {'loss': 0.3064, 'grad_norm': 0.7459319688889009, 'learning_rate': 1.0107618472713338e-06, 'epoch': 0.8} + 80%|████████ | 9754/12188 [1:36:54<5:27:55, 8.08s/it] 80%|████████ | 9755/12188 [1:37:04<5:44:05, 8.49s/it] {'loss': 0.3157, 'grad_norm': 0.7017361125236685, 'learning_rate': 1.0099609655159964e-06, 'epoch': 0.8} + 80%|████████ | 9755/12188 [1:37:04<5:44:05, 8.49s/it] 80%|████████ | 9756/12188 [1:37:11<5:31:42, 8.18s/it] {'loss': 0.33, 'grad_norm': 0.7324281092465356, 'learning_rate': 1.0091603655308069e-06, 'epoch': 0.8} + 80%|████████ | 9756/12188 [1:37:11<5:31:42, 8.18s/it] 80%|████████ | 9757/12188 [1:37:18<5:20:12, 7.90s/it] {'loss': 0.2643, 'grad_norm': 0.716559584055515, 'learning_rate': 1.0083600473722987e-06, 'epoch': 0.8} + 80%|████████ | 9757/12188 [1:37:18<5:20:12, 7.90s/it] 80%|████████ | 9758/12188 [1:37:25<5:05:27, 7.54s/it] {'loss': 0.3452, 'grad_norm': 0.7103255845120597, 'learning_rate': 1.0075600110969914e-06, 'epoch': 0.8} + 80%|████████ | 9758/12188 [1:37:25<5:05:27, 7.54s/it] 80%|████████ | 9759/12188 [1:37:36<5:39:29, 8.39s/it] {'loss': 0.2946, 'grad_norm': 0.8175115105202304, 'learning_rate': 1.0067602567613832e-06, 'epoch': 0.8} + 80%|████████ | 9759/12188 [1:37:36<5:39:29, 8.39s/it] 80%|████████ | 9760/12188 [1:37:43<5:24:28, 8.02s/it] {'loss': 0.2888, 'grad_norm': 0.7094279332298987, 'learning_rate': 1.00596078442195e-06, 'epoch': 0.8} + 80%|████████ | 9760/12188 [1:37:43<5:24:28, 8.02s/it] 80%|████████ | 9761/12188 [1:37:50<5:20:12, 7.92s/it] {'loss': 0.2917, 'grad_norm': 0.7292073297580015, 'learning_rate': 1.0051615941351472e-06, 'epoch': 0.8} + 80%|████████ | 9761/12188 [1:37:50<5:20:12, 7.92s/it] 80%|████████ | 9762/12188 [1:37:57<5:08:30, 7.63s/it] {'loss': 0.3051, 'grad_norm': 0.8867568727181516, 'learning_rate': 1.0043626859574167e-06, 'epoch': 0.8} + 80%|████████ | 9762/12188 [1:37:57<5:08:30, 7.63s/it] 80%|████████ | 9763/12188 [1:38:04<5:02:12, 7.48s/it] {'loss': 0.3091, 'grad_norm': 0.6566991392409652, 'learning_rate': 1.0035640599451719e-06, 'epoch': 0.8} + 80%|████████ | 9763/12188 [1:38:04<5:02:12, 7.48s/it] 80%|████████ | 9764/12188 [1:38:11<4:55:20, 7.31s/it] {'loss': 0.3019, 'grad_norm': 0.6280236108057966, 'learning_rate': 1.0027657161548144e-06, 'epoch': 0.8} + 80%|████████ | 9764/12188 [1:38:11<4:55:20, 7.31s/it] 80%|████████ | 9765/12188 [1:38:19<4:56:15, 7.34s/it] {'loss': 0.315, 'grad_norm': 1.0925294907366019, 'learning_rate': 1.001967654642718e-06, 'epoch': 0.8} + 80%|████████ | 9765/12188 [1:38:19<4:56:15, 7.34s/it] 80%|████████ | 9766/12188 [1:38:26<4:53:27, 7.27s/it] {'loss': 0.2989, 'grad_norm': 0.9080477333923538, 'learning_rate': 1.0011698754652427e-06, 'epoch': 0.8} + 80%|████████ | 9766/12188 [1:38:26<4:53:27, 7.27s/it] 80%|████████ | 9767/12188 [1:38:33<4:52:17, 7.24s/it] {'loss': 0.3237, 'grad_norm': 0.6806791517242843, 'learning_rate': 1.0003723786787272e-06, 'epoch': 0.8} + 80%|████████ | 9767/12188 [1:38:33<4:52:17, 7.24s/it] 80%|████████ | 9768/12188 [1:38:40<4:51:35, 7.23s/it] {'loss': 0.2868, 'grad_norm': 0.7621514496859785, 'learning_rate': 9.995751643394879e-07, 'epoch': 0.8} + 80%|████████ | 9768/12188 [1:38:40<4:51:35, 7.23s/it] 80%|████████ | 9769/12188 [1:38:49<5:03:38, 7.53s/it] {'loss': 0.3308, 'grad_norm': 0.699591762040652, 'learning_rate': 9.987782325038242e-07, 'epoch': 0.8} + 80%|████████ | 9769/12188 [1:38:49<5:03:38, 7.53s/it] 80%|████████ | 9770/12188 [1:38:55<4:54:39, 7.31s/it] {'loss': 0.3174, 'grad_norm': 0.7437175704568343, 'learning_rate': 9.979815832280115e-07, 'epoch': 0.8} + 80%|████████ | 9770/12188 [1:38:55<4:54:39, 7.31s/it] 80%|████████ | 9771/12188 [1:39:02<4:49:24, 7.18s/it] {'loss': 0.3402, 'grad_norm': 0.7265005103162883, 'learning_rate': 9.971852165683105e-07, 'epoch': 0.8} + 80%|████████ | 9771/12188 [1:39:02<4:49:24, 7.18s/it] 80%|████████ | 9772/12188 [1:39:09<4:45:45, 7.10s/it] {'loss': 0.3318, 'grad_norm': 0.6999851254031201, 'learning_rate': 9.963891325809594e-07, 'epoch': 0.8} + 80%|████████ | 9772/12188 [1:39:09<4:45:45, 7.10s/it] 80%|████████ | 9773/12188 [1:39:17<4:59:58, 7.45s/it] {'loss': 0.2958, 'grad_norm': 0.7205715110463815, 'learning_rate': 9.955933313221754e-07, 'epoch': 0.8} + 80%|████████ | 9773/12188 [1:39:17<4:59:58, 7.45s/it] 80%|████████ | 9774/12188 [1:39:24<4:53:24, 7.29s/it] {'loss': 0.3198, 'grad_norm': 0.705778142913246, 'learning_rate': 9.947978128481556e-07, 'epoch': 0.8} + 80%|████████ | 9774/12188 [1:39:24<4:53:24, 7.29s/it] 80%|████████ | 9775/12188 [1:39:31<4:45:27, 7.10s/it] {'loss': 0.3035, 'grad_norm': 0.731759531252536, 'learning_rate': 9.940025772150808e-07, 'epoch': 0.8} + 80%|████████ | 9775/12188 [1:39:31<4:45:27, 7.10s/it] 80%|████████ | 9776/12188 [1:39:38<4:39:31, 6.95s/it] {'loss': 0.308, 'grad_norm': 0.6596676324798115, 'learning_rate': 9.932076244791062e-07, 'epoch': 0.8} + 80%|████████ | 9776/12188 [1:39:38<4:39:31, 6.95s/it] 80%|████████ | 9777/12188 [1:39:45<4:41:21, 7.00s/it] {'loss': 0.2988, 'grad_norm': 0.7233696228057126, 'learning_rate': 9.924129546963735e-07, 'epoch': 0.8} + 80%|████████ | 9777/12188 [1:39:45<4:41:21, 7.00s/it] 80%|████████ | 9778/12188 [1:39:52<4:40:06, 6.97s/it] {'loss': 0.2711, 'grad_norm': 0.7561149816515415, 'learning_rate': 9.916185679229973e-07, 'epoch': 0.8} + 80%|████████ | 9778/12188 [1:39:52<4:40:06, 6.97s/it] 80%|████████ | 9779/12188 [1:39:59<4:44:23, 7.08s/it] {'loss': 0.3393, 'grad_norm': 0.6980271607191585, 'learning_rate': 9.908244642150783e-07, 'epoch': 0.8} + 80%|████████ | 9779/12188 [1:39:59<4:44:23, 7.08s/it] 80%|████████ | 9780/12188 [1:40:07<4:50:49, 7.25s/it] {'loss': 0.2953, 'grad_norm': 1.0591153948226608, 'learning_rate': 9.900306436286962e-07, 'epoch': 0.8} + 80%|████████ | 9780/12188 [1:40:07<4:50:49, 7.25s/it] 80%|████████ | 9781/12188 [1:40:14<4:50:09, 7.23s/it] {'loss': 0.264, 'grad_norm': 0.6753955976136956, 'learning_rate': 9.892371062199051e-07, 'epoch': 0.8} + 80%|████████ | 9781/12188 [1:40:14<4:50:09, 7.23s/it] 80%|████████ | 9782/12188 [1:40:21<4:51:52, 7.28s/it] {'loss': 0.2871, 'grad_norm': 0.6872355797388457, 'learning_rate': 9.88443852044748e-07, 'epoch': 0.8} + 80%|████████ | 9782/12188 [1:40:21<4:51:52, 7.28s/it] 80%|████████ | 9783/12188 [1:40:30<5:07:26, 7.67s/it] {'loss': 0.301, 'grad_norm': 0.8323191142725243, 'learning_rate': 9.876508811592395e-07, 'epoch': 0.8} + 80%|████████ | 9783/12188 [1:40:30<5:07:26, 7.67s/it] 80%|████████ | 9784/12188 [1:40:37<4:56:53, 7.41s/it] {'loss': 0.2998, 'grad_norm': 0.7195435139664191, 'learning_rate': 9.868581936193794e-07, 'epoch': 0.8} + 80%|████████ | 9784/12188 [1:40:37<4:56:53, 7.41s/it] 80%|████████ | 9785/12188 [1:40:43<4:51:37, 7.28s/it] {'loss': 0.3412, 'grad_norm': 0.7074759420834195, 'learning_rate': 9.860657894811476e-07, 'epoch': 0.8} + 80%|████████ | 9785/12188 [1:40:44<4:51:37, 7.28s/it] 80%|████████ | 9786/12188 [1:40:52<5:04:52, 7.62s/it] {'loss': 0.3054, 'grad_norm': 0.7110081082637354, 'learning_rate': 9.852736688004988e-07, 'epoch': 0.8} + 80%|████████ | 9786/12188 [1:40:52<5:04:52, 7.62s/it] 80%|████████ | 9787/12188 [1:40:59<4:59:21, 7.48s/it] {'loss': 0.3092, 'grad_norm': 0.8953186980153235, 'learning_rate': 9.844818316333754e-07, 'epoch': 0.8} + 80%|████████ | 9787/12188 [1:40:59<4:59:21, 7.48s/it] 80%|████████ | 9788/12188 [1:41:06<4:55:26, 7.39s/it] {'loss': 0.2654, 'grad_norm': 0.7050655151403508, 'learning_rate': 9.836902780356932e-07, 'epoch': 0.8} + 80%|████████ | 9788/12188 [1:41:06<4:55:26, 7.39s/it] 80%|████████ | 9789/12188 [1:41:14<4:58:44, 7.47s/it] {'loss': 0.2896, 'grad_norm': 0.7524818726373084, 'learning_rate': 9.8289900806335e-07, 'epoch': 0.8} + 80%|████████ | 9789/12188 [1:41:14<4:58:44, 7.47s/it] 80%|████████ | 9790/12188 [1:41:21<4:53:23, 7.34s/it] {'loss': 0.2994, 'grad_norm': 0.6950350403705858, 'learning_rate': 9.821080217722262e-07, 'epoch': 0.8} + 80%|████████ | 9790/12188 [1:41:21<4:53:23, 7.34s/it] 80%|████████ | 9791/12188 [1:41:31<5:23:52, 8.11s/it] {'loss': 0.3016, 'grad_norm': 0.6470198952528978, 'learning_rate': 9.81317319218177e-07, 'epoch': 0.8} + 80%|████████ | 9791/12188 [1:41:31<5:23:52, 8.11s/it] 80%|████████ | 9792/12188 [1:41:39<5:27:21, 8.20s/it] {'loss': 0.2856, 'grad_norm': 0.6838308419005166, 'learning_rate': 9.805269004570435e-07, 'epoch': 0.8} + 80%|████████ | 9792/12188 [1:41:39<5:27:21, 8.20s/it] 80%|████████ | 9793/12188 [1:41:46<5:13:30, 7.85s/it] {'loss': 0.3389, 'grad_norm': 0.7965745518742563, 'learning_rate': 9.797367655446415e-07, 'epoch': 0.8} + 80%|████████ | 9793/12188 [1:41:46<5:13:30, 7.85s/it] 80%|████████ | 9794/12188 [1:41:55<5:29:37, 8.26s/it] {'loss': 0.2707, 'grad_norm': 0.6563726677985214, 'learning_rate': 9.789469145367696e-07, 'epoch': 0.8} + 80%|████████ | 9794/12188 [1:41:56<5:29:37, 8.26s/it] 80%|████████ | 9795/12188 [1:42:03<5:20:32, 8.04s/it] {'loss': 0.3275, 'grad_norm': 0.6478316969374982, 'learning_rate': 9.78157347489208e-07, 'epoch': 0.8} + 80%|████████ | 9795/12188 [1:42:03<5:20:32, 8.04s/it] 80%|████████ | 9796/12188 [1:42:10<5:10:43, 7.79s/it] {'loss': 0.2956, 'grad_norm': 0.6528130855580764, 'learning_rate': 9.773680644577116e-07, 'epoch': 0.8} + 80%|████████ | 9796/12188 [1:42:10<5:10:43, 7.79s/it] 80%|████████ | 9797/12188 [1:42:17<5:03:04, 7.61s/it] {'loss': 0.3274, 'grad_norm': 0.6895641087024712, 'learning_rate': 9.765790654980195e-07, 'epoch': 0.8} + 80%|████████ | 9797/12188 [1:42:17<5:03:04, 7.61s/it] 80%|████████ | 9798/12188 [1:42:25<5:03:56, 7.63s/it] {'loss': 0.2996, 'grad_norm': 0.8196761825169925, 'learning_rate': 9.75790350665851e-07, 'epoch': 0.8} + 80%|████████ | 9798/12188 [1:42:25<5:03:56, 7.63s/it] 80%|████████ | 9799/12188 [1:42:32<4:58:51, 7.51s/it] {'loss': 0.3097, 'grad_norm': 0.685141756807335, 'learning_rate': 9.750019200169014e-07, 'epoch': 0.8} + 80%|████���███ | 9799/12188 [1:42:32<4:58:51, 7.51s/it] 80%|████████ | 9800/12188 [1:42:39<4:50:21, 7.30s/it] {'loss': 0.3302, 'grad_norm': 0.6889135550307863, 'learning_rate': 9.742137736068508e-07, 'epoch': 0.8} + 80%|████████ | 9800/12188 [1:42:39<4:50:21, 7.30s/it] 80%|████████ | 9801/12188 [1:42:46<4:44:02, 7.14s/it] {'loss': 0.3068, 'grad_norm': 0.7059144562459329, 'learning_rate': 9.734259114913557e-07, 'epoch': 0.8} + 80%|████████ | 9801/12188 [1:42:46<4:44:02, 7.14s/it] 80%|████████ | 9802/12188 [1:42:53<4:41:33, 7.08s/it] {'loss': 0.2966, 'grad_norm': 0.7223391291343132, 'learning_rate': 9.72638333726052e-07, 'epoch': 0.8} + 80%|████████ | 9802/12188 [1:42:53<4:41:33, 7.08s/it] 80%|████████ | 9803/12188 [1:43:00<4:44:52, 7.17s/it] {'loss': 0.2635, 'grad_norm': 0.6691725662348752, 'learning_rate': 9.718510403665599e-07, 'epoch': 0.8} + 80%|████████ | 9803/12188 [1:43:00<4:44:52, 7.17s/it] 80%|████████ | 9804/12188 [1:43:08<4:55:37, 7.44s/it] {'loss': 0.3324, 'grad_norm': 0.7167795288995025, 'learning_rate': 9.710640314684744e-07, 'epoch': 0.8} + 80%|████████ | 9804/12188 [1:43:08<4:55:37, 7.44s/it] 80%|████████ | 9805/12188 [1:43:15<4:49:54, 7.30s/it] {'loss': 0.2543, 'grad_norm': 0.6387484427288932, 'learning_rate': 9.702773070873755e-07, 'epoch': 0.8} + 80%|████████ | 9805/12188 [1:43:15<4:49:54, 7.30s/it] 80%|████████ | 9806/12188 [1:43:26<5:25:58, 8.21s/it] {'loss': 0.3054, 'grad_norm': 0.6481747222188106, 'learning_rate': 9.694908672788178e-07, 'epoch': 0.8} + 80%|████████ | 9806/12188 [1:43:26<5:25:58, 8.21s/it] 80%|████████ | 9807/12188 [1:43:33<5:20:35, 8.08s/it] {'loss': 0.3419, 'grad_norm': 0.6796133006510948, 'learning_rate': 9.687047120983396e-07, 'epoch': 0.8} + 80%|████████ | 9807/12188 [1:43:33<5:20:35, 8.08s/it] 80%|████████ | 9808/12188 [1:43:41<5:17:06, 7.99s/it] {'loss': 0.2962, 'grad_norm': 0.7343041708307997, 'learning_rate': 9.679188416014585e-07, 'epoch': 0.8} + 80%|████████ | 9808/12188 [1:43:41<5:17:06, 7.99s/it] 80%|████████ | 9809/12188 [1:43:48<5:04:07, 7.67s/it] {'loss': 0.2848, 'grad_norm': 0.6893584309659683, 'learning_rate': 9.671332558436697e-07, 'epoch': 0.8} + 80%|████████ | 9809/12188 [1:43:48<5:04:07, 7.67s/it] 80%|████████ | 9810/12188 [1:43:55<4:55:34, 7.46s/it] {'loss': 0.2922, 'grad_norm': 0.6793687699884136, 'learning_rate': 9.663479548804527e-07, 'epoch': 0.8} + 80%|████████ | 9810/12188 [1:43:55<4:55:34, 7.46s/it] 80%|████████ | 9811/12188 [1:44:03<5:02:07, 7.63s/it] {'loss': 0.3048, 'grad_norm': 0.7364870950316772, 'learning_rate': 9.65562938767261e-07, 'epoch': 0.8} + 80%|████████ | 9811/12188 [1:44:03<5:02:07, 7.63s/it] 81%|████████ | 9812/12188 [1:44:13<5:30:52, 8.36s/it] {'loss': 0.2861, 'grad_norm': 0.6809929878339055, 'learning_rate': 9.64778207559533e-07, 'epoch': 0.81} + 81%|████████ | 9812/12188 [1:44:13<5:30:52, 8.36s/it] 81%|████████ | 9813/12188 [1:44:20<5:18:22, 8.04s/it] {'loss': 0.3133, 'grad_norm': 0.7160690586838426, 'learning_rate': 9.63993761312686e-07, 'epoch': 0.81} + 81%|████████ | 9813/12188 [1:44:20<5:18:22, 8.04s/it] 81%|████████ | 9814/12188 [1:44:27<5:04:32, 7.70s/it] {'loss': 0.3424, 'grad_norm': 0.6466841562524096, 'learning_rate': 9.63209600082114e-07, 'epoch': 0.81} + 81%|████████ | 9814/12188 [1:44:27<5:04:32, 7.70s/it] 81%|████████ | 9815/12188 [1:44:35<5:01:53, 7.63s/it] {'loss': 0.3226, 'grad_norm': 0.719344173188712, 'learning_rate': 9.624257239231955e-07, 'epoch': 0.81} + 81%|████████ | 9815/12188 [1:44:35<5:01:53, 7.63s/it] 81%|████████ | 9816/12188 [1:44:44<5:22:41, 8.16s/it] {'loss': 0.28, 'grad_norm': 0.6558356641966975, 'learning_rate': 9.616421328912862e-07, 'epoch': 0.81} + 81%|████████ | 9816/12188 [1:44:44<5:22:41, 8.16s/it] 81%|████████ | 9817/12188 [1:44:51<5:11:49, 7.89s/it] {'loss': 0.3131, 'grad_norm': 0.7098968242875349, 'learning_rate': 9.608588270417196e-07, 'epoch': 0.81} + 81%|████████ | 9817/12188 [1:44:51<5:11:49, 7.89s/it] 81%|████████ | 9818/12188 [1:44:59<5:07:33, 7.79s/it] {'loss': 0.3156, 'grad_norm': 0.6844156285407984, 'learning_rate': 9.600758064298144e-07, 'epoch': 0.81} + 81%|████████ | 9818/12188 [1:44:59<5:07:33, 7.79s/it] 81%|████████ | 9819/12188 [1:45:10<5:39:57, 8.61s/it] {'loss': 0.3178, 'grad_norm': 0.7284685029721311, 'learning_rate': 9.59293071110864e-07, 'epoch': 0.81} + 81%|████████ | 9819/12188 [1:45:10<5:39:57, 8.61s/it] 81%|████████ | 9820/12188 [1:45:16<5:17:58, 8.06s/it] {'loss': 0.2838, 'grad_norm': 0.7212034662941212, 'learning_rate': 9.58510621140145e-07, 'epoch': 0.81} + 81%|████████ | 9820/12188 [1:45:16<5:17:58, 8.06s/it] 81%|████████ | 9821/12188 [1:45:23<5:00:40, 7.62s/it] {'loss': 0.3164, 'grad_norm': 0.7437386228851424, 'learning_rate': 9.577284565729139e-07, 'epoch': 0.81} + 81%|████████ | 9821/12188 [1:45:23<5:00:40, 7.62s/it] 81%|████████ | 9822/12188 [1:45:30<4:49:58, 7.35s/it] {'loss': 0.3057, 'grad_norm': 0.7196098185849947, 'learning_rate': 9.569465774644038e-07, 'epoch': 0.81} + 81%|████████ | 9822/12188 [1:45:30<4:49:58, 7.35s/it] 81%|████████ | 9823/12188 [1:45:36<4:41:59, 7.15s/it] {'loss': 0.2954, 'grad_norm': 0.7792155867896248, 'learning_rate': 9.561649838698317e-07, 'epoch': 0.81} + 81%|████████ | 9823/12188 [1:45:36<4:41:59, 7.15s/it] 81%|████████ | 9824/12188 [1:45:43<4:40:39, 7.12s/it] {'loss': 0.3113, 'grad_norm': 0.7217605930956181, 'learning_rate': 9.553836758443908e-07, 'epoch': 0.81} + 81%|████████ | 9824/12188 [1:45:43<4:40:39, 7.12s/it] 81%|████████ | 9825/12188 [1:45:50<4:37:14, 7.04s/it] {'loss': 0.2928, 'grad_norm': 0.6278993455399428, 'learning_rate': 9.546026534432563e-07, 'epoch': 0.81} + 81%|████████ | 9825/12188 [1:45:50<4:37:14, 7.04s/it] 81%|████████ | 9826/12188 [1:45:57<4:39:18, 7.10s/it] {'loss': 0.2959, 'grad_norm': 0.7425513582935067, 'learning_rate': 9.53821916721584e-07, 'epoch': 0.81} + 81%|████████ | 9826/12188 [1:45:57<4:39:18, 7.10s/it] 81%|████████ | 9827/12188 [1:46:05<4:40:42, 7.13s/it] {'loss': 0.2818, 'grad_norm': 0.7999019379598489, 'learning_rate': 9.530414657345061e-07, 'epoch': 0.81} + 81%|████████ | 9827/12188 [1:46:05<4:40:42, 7.13s/it] 81%|████████ | 9828/12188 [1:46:12<4:48:41, 7.34s/it] {'loss': 0.3447, 'grad_norm': 0.9870979950903441, 'learning_rate': 9.522613005371389e-07, 'epoch': 0.81} + 81%|████████ | 9828/12188 [1:46:12<4:48:41, 7.34s/it] 81%|████████ | 9829/12188 [1:46:19<4:43:23, 7.21s/it] {'loss': 0.2856, 'grad_norm': 0.6895303205989711, 'learning_rate': 9.514814211845747e-07, 'epoch': 0.81} + 81%|████████ | 9829/12188 [1:46:19<4:43:23, 7.21s/it] 81%|████████ | 9830/12188 [1:46:27<4:47:17, 7.31s/it] {'loss': 0.2869, 'grad_norm': 0.6956771369815642, 'learning_rate': 9.507018277318896e-07, 'epoch': 0.81} + 81%|████████ | 9830/12188 [1:46:27<4:47:17, 7.31s/it] 81%|████████ | 9831/12188 [1:46:34<4:41:00, 7.15s/it] {'loss': 0.3447, 'grad_norm': 0.9245295892625602, 'learning_rate': 9.49922520234135e-07, 'epoch': 0.81} + 81%|████████ | 9831/12188 [1:46:34<4:41:00, 7.15s/it] 81%|████████ | 9832/12188 [1:46:41<4:46:45, 7.30s/it] {'loss': 0.2959, 'grad_norm': 0.7074187552951492, 'learning_rate': 9.491434987463444e-07, 'epoch': 0.81} + 81%|████████ | 9832/12188 [1:46:41<4:46:45, 7.30s/it] 81%|████████ | 9833/12188 [1:46:49<4:46:03, 7.29s/it] {'loss': 0.3102, 'grad_norm': 0.6812669031564845, 'learning_rate': 9.483647633235315e-07, 'epoch': 0.81} + 81%|████████ | 9833/12188 [1:46:49<4:46:03, 7.29s/it] 81%|████████ | 9834/12188 [1:46:56<4:49:16, 7.37s/it] {'loss': 0.3327, 'grad_norm': 0.9703420755628027, 'learning_rate': 9.47586314020691e-07, 'epoch': 0.81} + 81%|████████ | 9834/12188 [1:46:56<4:49:16, 7.37s/it] 81%|████████ | 9835/12188 [1:47:03<4:44:59, 7.27s/it] {'loss': 0.313, 'grad_norm': 0.7958605027791359, 'learning_rate': 9.468081508927923e-07, 'epoch': 0.81} + 81%|████████ | 9835/12188 [1:47:03<4:44:59, 7.27s/it] 81%|████████ | 9836/12188 [1:47:10<4:44:04, 7.25s/it] {'loss': 0.3255, 'grad_norm': 0.7759277178364582, 'learning_rate': 9.460302739947924e-07, 'epoch': 0.81} + 81%|████████ | 9836/12188 [1:47:10<4:44:04, 7.25s/it] 81%|████████ | 9837/12188 [1:47:17<4:40:23, 7.16s/it] {'loss': 0.2878, 'grad_norm': 0.6975689778285528, 'learning_rate': 9.452526833816194e-07, 'epoch': 0.81} + 81%|████████ | 9837/12188 [1:47:17<4:40:23, 7.16s/it] 81%|████████ | 9838/12188 [1:47:28<5:15:39, 8.06s/it] {'loss': 0.2918, 'grad_norm': 0.6620731945567515, 'learning_rate': 9.444753791081874e-07, 'epoch': 0.81} + 81%|████████ | 9838/12188 [1:47:28<5:15:39, 8.06s/it] 81%|████████ | 9839/12188 [1:47:34<5:02:33, 7.73s/it] {'loss': 0.2874, 'grad_norm': 0.7303074673590996, 'learning_rate': 9.436983612293898e-07, 'epoch': 0.81} + 81%|████████ | 9839/12188 [1:47:34<5:02:33, 7.73s/it] 81%|████████ | 9840/12188 [1:47:42<5:02:38, 7.73s/it] {'loss': 0.2557, 'grad_norm': 0.6660505469852952, 'learning_rate': 9.429216298000959e-07, 'epoch': 0.81} + 81%|████████ | 9840/12188 [1:47:42<5:02:38, 7.73s/it] 81%|████████ | 9841/12188 [1:47:50<4:58:51, 7.64s/it] {'loss': 0.3294, 'grad_norm': 0.717309954091727, 'learning_rate': 9.421451848751595e-07, 'epoch': 0.81} + 81%|████████ | 9841/12188 [1:47:50<4:58:51, 7.64s/it] 81%|████████ | 9842/12188 [1:47:57<4:49:47, 7.41s/it] {'loss': 0.3145, 'grad_norm': 0.7040660663612565, 'learning_rate': 9.413690265094094e-07, 'epoch': 0.81} + 81%|████████ | 9842/12188 [1:47:57<4:49:47, 7.41s/it] 81%|████████ | 9843/12188 [1:48:04<4:45:29, 7.30s/it] {'loss': 0.3258, 'grad_norm': 0.7458908649745646, 'learning_rate': 9.405931547576591e-07, 'epoch': 0.81} + 81%|████████ | 9843/12188 [1:48:04<4:45:29, 7.30s/it] 81%|████████ | 9844/12188 [1:48:11<4:42:30, 7.23s/it] {'loss': 0.273, 'grad_norm': 0.8413307116599421, 'learning_rate': 9.398175696746986e-07, 'epoch': 0.81} + 81%|████████ | 9844/12188 [1:48:11<4:42:30, 7.23s/it] 81%|████████ | 9845/12188 [1:48:18<4:41:45, 7.22s/it] {'loss': 0.3069, 'grad_norm': 0.6743870438271684, 'learning_rate': 9.39042271315297e-07, 'epoch': 0.81} + 81%|████████ | 9845/12188 [1:48:18<4:41:45, 7.22s/it] 81%|████████ | 9846/12188 [1:48:27<5:00:23, 7.70s/it] {'loss': 0.3375, 'grad_norm': 0.6947556637756624, 'learning_rate': 9.382672597342063e-07, 'epoch': 0.81} + 81%|████████ | 9846/12188 [1:48:27<5:00:23, 7.70s/it] 81%|████████ | 9847/12188 [1:48:34<4:51:33, 7.47s/it] {'loss': 0.2917, 'grad_norm': 0.672092148224181, 'learning_rate': 9.374925349861552e-07, 'epoch': 0.81} + 81%|████████ | 9847/12188 [1:48:34<4:51:33, 7.47s/it] 81%|████████ | 9848/12188 [1:48:41<4:46:04, 7.34s/it] {'loss': 0.338, 'grad_norm': 0.6860567183996107, 'learning_rate': 9.367180971258543e-07, 'epoch': 0.81} + 81%|████████ | 9848/12188 [1:48:41<4:46:04, 7.34s/it] 81%|████████ | 9849/12188 [1:48:48<4:45:06, 7.31s/it] {'loss': 0.3064, 'grad_norm': 0.7367611415321796, 'learning_rate': 9.35943946207995e-07, 'epoch': 0.81} + 81%|████████ | 9849/12188 [1:48:48<4:45:06, 7.31s/it] 81%|████████ | 9850/12188 [1:48:55<4:48:00, 7.39s/it] {'loss': 0.3013, 'grad_norm': 0.7073835079459354, 'learning_rate': 9.351700822872428e-07, 'epoch': 0.81} + 81%|████████ | 9850/12188 [1:48:55<4:48:00, 7.39s/it] 81%|████████ | 9851/12188 [1:49:04<4:59:39, 7.69s/it] {'loss': 0.3242, 'grad_norm': 0.6490301534496024, 'learning_rate': 9.343965054182491e-07, 'epoch': 0.81} + 81%|████████ | 9851/12188 [1:49:04<4:59:39, 7.69s/it] 81%|████████ | 9852/12188 [1:49:11<4:53:40, 7.54s/it] {'loss': 0.2992, 'grad_norm': 0.7157015323278011, 'learning_rate': 9.336232156556435e-07, 'epoch': 0.81} + 81%|████████ | 9852/12188 [1:49:11<4:53:40, 7.54s/it] 81%|████████ | 9853/12188 [1:49:18<4:50:53, 7.47s/it] {'loss': 0.278, 'grad_norm': 0.7316506111072479, 'learning_rate': 9.328502130540324e-07, 'epoch': 0.81} + 81%|████████ | 9853/12188 [1:49:18<4:50:53, 7.47s/it] 81%|████████ | 9854/12188 [1:49:25<4:45:04, 7.33s/it] {'loss': 0.3066, 'grad_norm': 0.7172763416520466, 'learning_rate': 9.320774976680058e-07, 'epoch': 0.81} + 81%|████████ | 9854/12188 [1:49:25<4:45:04, 7.33s/it] 81%|████████ | 9855/12188 [1:49:32<4:43:08, 7.28s/it] {'loss': 0.2985, 'grad_norm': 0.7223924647716998, 'learning_rate': 9.313050695521292e-07, 'epoch': 0.81} + 81%|████████ | 9855/12188 [1:49:33<4:43:08, 7.28s/it] 81%|████████ | 9856/12188 [1:49:42<5:05:29, 7.86s/it] {'loss': 0.2617, 'grad_norm': 0.6851662807725613, 'learning_rate': 9.305329287609533e-07, 'epoch': 0.81} + 81%|████████ | 9856/12188 [1:49:42<5:05:29, 7.86s/it] 81%|████████ | 9857/12188 [1:49:49<5:01:48, 7.77s/it] {'loss': 0.2811, 'grad_norm': 0.6681299895446859, 'learning_rate': 9.297610753490027e-07, 'epoch': 0.81} + 81%|████████ | 9857/12188 [1:49:49<5:01:48, 7.77s/it] 81%|████████ | 9858/12188 [1:49:56<4:50:22, 7.48s/it] {'loss': 0.2927, 'grad_norm': 0.6649945691432954, 'learning_rate': 9.289895093707862e-07, 'epoch': 0.81} + 81%|████████ | 9858/12188 [1:49:56<4:50:22, 7.48s/it] 81%|████████ | 9859/12188 [1:50:03<4:43:48, 7.31s/it] {'loss': 0.3014, 'grad_norm': 0.7004445350191841, 'learning_rate': 9.282182308807907e-07, 'epoch': 0.81} + 81%|████████ | 9859/12188 [1:50:03<4:43:48, 7.31s/it] 81%|████████ | 9860/12188 [1:50:10<4:38:38, 7.18s/it] {'loss': 0.3512, 'grad_norm': 0.7171687005866211, 'learning_rate': 9.274472399334805e-07, 'epoch': 0.81} + 81%|████████ | 9860/12188 [1:50:10<4:38:38, 7.18s/it] 81%|████████ | 9861/12188 [1:50:17<4:40:34, 7.23s/it] {'loss': 0.3, 'grad_norm': 0.7877707717277538, 'learning_rate': 9.266765365833031e-07, 'epoch': 0.81} + 81%|████████ | 9861/12188 [1:50:17<4:40:34, 7.23s/it] 81%|████████ | 9862/12188 [1:50:25<4:41:08, 7.25s/it] {'loss': 0.2935, 'grad_norm': 0.6831735686691142, 'learning_rate': 9.259061208846859e-07, 'epoch': 0.81} + 81%|████████ | 9862/12188 [1:50:25<4:41:08, 7.25s/it] 81%|████████ | 9863/12188 [1:50:32<4:38:01, 7.17s/it] {'loss': 0.2576, 'grad_norm': 0.6926548792955355, 'learning_rate': 9.251359928920312e-07, 'epoch': 0.81} + 81%|████████ | 9863/12188 [1:50:32<4:38:01, 7.17s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fbb850764d0> +[Try #0] Failed to fetch sample 4408092 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fbb850764d0> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Picture 1 of 18'"}, {'from': 'gpt', 'value': '\nclick(x=0.245, y=0.43)\n'}]} + 81%|████████ | 9864/12188 [1:50:40<4:48:55, 7.46s/it] {'loss': 0.3123, 'grad_norm': 0.7440237065165932, 'learning_rate': 9.243661526597275e-07, 'epoch': 0.81} + 81%|████████ | 9864/12188 [1:50:40<4:48:55, 7.46s/it] 81%|████████ | 9865/12188 [1:50:47<4:42:00, 7.28s/it] {'loss': 0.2947, 'grad_norm': 0.6703671856287735, 'learning_rate': 9.235966002421365e-07, 'epoch': 0.81} + 81%|████████ | 9865/12188 [1:50:47<4:42:00, 7.28s/it] 81%|████████ | 9866/12188 [1:50:54<4:43:17, 7.32s/it] {'loss': 0.3001, 'grad_norm': 0.7255419388336735, 'learning_rate': 9.228273356936046e-07, 'epoch': 0.81} + 81%|████████ | 9866/12188 [1:50:54<4:43:17, 7.32s/it] 81%|████████ | 9867/12188 [1:51:01<4:37:49, 7.18s/it] {'loss': 0.3168, 'grad_norm': 0.8571055350460317, 'learning_rate': 9.220583590684567e-07, 'epoch': 0.81} + 81%|████████ | 9867/12188 [1:51:01<4:37:49, 7.18s/it] 81%|████████ | 9868/12188 [1:51:08<4:32:46, 7.05s/it] {'loss': 0.3125, 'grad_norm': 0.6783254430930901, 'learning_rate': 9.21289670420995e-07, 'epoch': 0.81} + 81%|████████ | 9868/12188 [1:51:08<4:32:46, 7.05s/it] 81%|████████ | 9869/12188 [1:51:15<4:37:58, 7.19s/it] {'loss': 0.2761, 'grad_norm': 0.780027348645489, 'learning_rate': 9.205212698055049e-07, 'epoch': 0.81} + 81%|████████ | 9869/12188 [1:51:15<4:37:58, 7.19s/it] 81%|████████ | 9870/12188 [1:51:23<4:43:58, 7.35s/it] {'loss': 0.3425, 'grad_norm': 0.6556621257300823, 'learning_rate': 9.197531572762475e-07, 'epoch': 0.81} + 81%|████████ | 9870/12188 [1:51:23<4:43:58, 7.35s/it] 81%|████████ | 9871/12188 [1:51:30<4:47:39, 7.45s/it] {'loss': 0.3224, 'grad_norm': 0.6986245338494164, 'learning_rate': 9.189853328874676e-07, 'epoch': 0.81} + 81%|████████ | 9871/12188 [1:51:30<4:47:39, 7.45s/it] 81%|████████ | 9872/12188 [1:51:38<4:44:02, 7.36s/it] {'loss': 0.3106, 'grad_norm': 0.6812308031330732, 'learning_rate': 9.182177966933869e-07, 'epoch': 0.81} + 81%|████████ | 9872/12188 [1:51:38<4:44:02, 7.36s/it] 81%|████████ | 9873/12188 [1:51:45<4:45:57, 7.41s/it] {'loss': 0.304, 'grad_norm': 0.7546753668351757, 'learning_rate': 9.174505487482066e-07, 'epoch': 0.81} + 81%|████████ | 9873/12188 [1:51:45<4:45:57, 7.41s/it] 81%|████████ | 9874/12188 [1:51:52<4:39:46, 7.25s/it] {'loss': 0.3117, 'grad_norm': 0.6545160176254974, 'learning_rate': 9.166835891061088e-07, 'epoch': 0.81} + 81%|████████ | 9874/12188 [1:51:52<4:39:46, 7.25s/it] 81%|████████ | 9875/12188 [1:51:59<4:40:09, 7.27s/it] {'loss': 0.3069, 'grad_norm': 0.7390978281438431, 'learning_rate': 9.159169178212574e-07, 'epoch': 0.81} + 81%|████████ | 9875/12188 [1:51:59<4:40:09, 7.27s/it] 81%|████████ | 9876/12188 [1:52:06<4:34:10, 7.12s/it] {'loss': 0.2822, 'grad_norm': 0.8205958777880177, 'learning_rate': 9.151505349477901e-07, 'epoch': 0.81} + 81%|████████ | 9876/12188 [1:52:06<4:34:10, 7.12s/it] 81%|████████ | 9877/12188 [1:52:13<4:33:22, 7.10s/it] {'loss': 0.2976, 'grad_norm': 0.6660293619681592, 'learning_rate': 9.143844405398306e-07, 'epoch': 0.81} + 81%|████████ | 9877/12188 [1:52:13<4:33:22, 7.10s/it] 81%|████████ | 9878/12188 [1:52:21<4:37:03, 7.20s/it] {'loss': 0.3019, 'grad_norm': 0.6099372105089564, 'learning_rate': 9.136186346514769e-07, 'epoch': 0.81} + 81%|████████ | 9878/12188 [1:52:21<4:37:03, 7.20s/it] 81%|████████ | 9879/12188 [1:52:28<4:36:45, 7.19s/it] {'loss': 0.3162, 'grad_norm': 0.6860702532859134, 'learning_rate': 9.128531173368094e-07, 'epoch': 0.81} + 81%|████████ | 9879/12188 [1:52:28<4:36:45, 7.19s/it] 81%|████████ | 9880/12188 [1:52:35<4:40:55, 7.30s/it] {'loss': 0.3108, 'grad_norm': 0.8590212415022558, 'learning_rate': 9.120878886498897e-07, 'epoch': 0.81} + 81%|████████ | 9880/12188 [1:52:35<4:40:55, 7.30s/it] 81%|████████ | 9881/12188 [1:52:43<4:48:04, 7.49s/it] {'loss': 0.3263, 'grad_norm': 0.7603906380285638, 'learning_rate': 9.113229486447545e-07, 'epoch': 0.81} + 81%|████████ | 9881/12188 [1:52:43<4:48:04, 7.49s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fd565bcb420> +[Try #0] Failed to fetch sample 1072490 in VC:s3://gui/aguvis/aguvis-stage2/amex/images. Exception: cannot identify image file <_io.BytesIO object at 0x7fd565bcb420> +Problematic sample: {'image': 'bf3459bcaf434803a580fcd36cbe71aestep0.png', 'conversations': [{'from': 'human', 'value': '\nPlease generate the next move according to the UI screenshot, the task and previous operations.\n\nTask:\nOpen AP News. Share the link of the first article in the "Business" category\n\nPrevious operations:\nNone'}, {'from': 'gpt', 'value': "\nThe goal is to open the AP News app and find the first article in the 'Business' category. Starting by launching the AP News app is the logical first step.\n\n\nTap on the AP News app to open it.\n\n\nterminate(status='success')\n"}]} + 81%|████████ | 9882/12188 [1:52:51<4:56:55, 7.73s/it] {'loss': 0.3142, 'grad_norm': 0.7054667666563984, 'learning_rate': 9.105582973754252e-07, 'epoch': 0.81} + 81%|████████ | 9882/12188 [1:52:52<4:56:55, 7.73s/it] 81%|████████ | 9883/12188 [1:52:58<4:46:40, 7.46s/it] {'loss': 0.3142, 'grad_norm': 0.7248225064392722, 'learning_rate': 9.097939348958968e-07, 'epoch': 0.81} + 81%|████████ | 9883/12188 [1:52:58<4:46:40, 7.46s/it] 81%|████████ | 9884/12188 [1:53:05<4:39:09, 7.27s/it] {'loss': 0.3522, 'grad_norm': 0.658489341883135, 'learning_rate': 9.090298612601511e-07, 'epoch': 0.81} + 81%|████████ | 9884/12188 [1:53:05<4:39:09, 7.27s/it] 81%|████████ | 9885/12188 [1:53:13<4:41:02, 7.32s/it] {'loss': 0.2932, 'grad_norm': 0.7097749804285083, 'learning_rate': 9.082660765221424e-07, 'epoch': 0.81} + 81%|████████ | 9885/12188 [1:53:13<4:41:02, 7.32s/it] 81%|████████ | 9886/12188 [1:53:19<4:33:34, 7.13s/it] {'loss': 0.333, 'grad_norm': 0.7008237798854138, 'learning_rate': 9.075025807358107e-07, 'epoch': 0.81} + 81%|████████ | 9886/12188 [1:53:19<4:33:34, 7.13s/it] 81%|████████ | 9887/12188 [1:53:27<4:44:39, 7.42s/it] {'loss': 0.2997, 'grad_norm': 0.9484944051553038, 'learning_rate': 9.067393739550707e-07, 'epoch': 0.81} + 81%|████████ | 9887/12188 [1:53:27<4:44:39, 7.42s/it] 81%|████████ | 9888/12188 [1:53:37<5:06:04, 7.98s/it] {'loss': 0.3213, 'grad_norm': 0.7391896560312154, 'learning_rate': 9.059764562338208e-07, 'epoch': 0.81} + 81%|████████ | 9888/12188 [1:53:37<5:06:04, 7.98s/it] 81%|████████ | 9889/12188 [1:53:45<5:05:37, 7.98s/it] {'loss': 0.288, 'grad_norm': 0.711461187236298, 'learning_rate': 9.052138276259348e-07, 'epoch': 0.81} + 81%|████████ | 9889/12188 [1:53:45<5:05:37, 7.98s/it] 81%|████████ | 9890/12188 [1:53:52<4:57:13, 7.76s/it] {'loss': 0.2761, 'grad_norm': 0.6671805054266858, 'learning_rate': 9.044514881852706e-07, 'epoch': 0.81} + 81%|████████ | 9890/12188 [1:53:52<4:57:13, 7.76s/it] 81%|████████ | 9891/12188 [1:53:59<4:45:18, 7.45s/it] {'loss': 0.3041, 'grad_norm': 0.7388556586752176, 'learning_rate': 9.036894379656613e-07, 'epoch': 0.81} + 81%|████████ | 9891/12188 [1:53:59<4:45:18, 7.45s/it] 81%|████████ | 9892/12188 [1:54:06<4:44:15, 7.43s/it] {'loss': 0.2976, 'grad_norm': 0.7704781022605657, 'learning_rate': 9.029276770209228e-07, 'epoch': 0.81} + 81%|████████ | 9892/12188 [1:54:06<4:44:15, 7.43s/it] 81%|████████ | 9893/12188 [1:54:13<4:41:24, 7.36s/it] {'loss': 0.3117, 'grad_norm': 0.6979649702597693, 'learning_rate': 9.021662054048502e-07, 'epoch': 0.81} + 81%|████████ | 9893/12188 [1:54:13<4:41:24, 7.36s/it] 81%|████████ | 9894/12188 [1:54:22<4:52:53, 7.66s/it] {'loss': 0.292, 'grad_norm': 0.68914797568284, 'learning_rate': 9.014050231712157e-07, 'epoch': 0.81} + 81%|████████ | 9894/12188 [1:54:22<4:52:53, 7.66s/it] 81%|████████ | 9895/12188 [1:54:29<4:51:48, 7.64s/it] {'loss': 0.302, 'grad_norm': 0.6217146797714456, 'learning_rate': 9.006441303737746e-07, 'epoch': 0.81} + 81%|████████ | 9895/12188 [1:54:29<4:51:48, 7.64s/it] 81%|████████ | 9896/12188 [1:54:36<4:45:57, 7.49s/it] {'loss': 0.2756, 'grad_norm': 0.6392786375343115, 'learning_rate': 8.998835270662576e-07, 'epoch': 0.81} + 81%|████████ | 9896/12188 [1:54:36<4:45:57, 7.49s/it] 81%|████████ | 9897/12188 [1:54:44<4:49:27, 7.58s/it] {'loss': 0.3099, 'grad_norm': 1.1686975315029873, 'learning_rate': 8.991232133023798e-07, 'epoch': 0.81} + 81%|████████ | 9897/12188 [1:54:44<4:49:27, 7.58s/it] 81%|████████ | 9898/12188 [1:54:51<4:40:39, 7.35s/it] {'loss': 0.2938, 'grad_norm': 0.728643639357711, 'learning_rate': 8.983631891358308e-07, 'epoch': 0.81} + 81%|████████ | 9898/12188 [1:54:51<4:40:39, 7.35s/it] 81%|████████ | 9899/12188 [1:54:57<4:31:23, 7.11s/it] {'loss': 0.2738, 'grad_norm': 0.8272818163655001, 'learning_rate': 8.97603454620285e-07, 'epoch': 0.81} + 81%|████████ | 9899/12188 [1:54:57<4:31:23, 7.11s/it] 81%|████████ | 9900/12188 [1:55:04<4:29:53, 7.08s/it] {'loss': 0.309, 'grad_norm': 0.6824884633625767, 'learning_rate': 8.968440098093922e-07, 'epoch': 0.81} + 81%|████████ | 9900/12188 [1:55:04<4:29:53, 7.08s/it] 81%|████████ | 9901/12188 [1:55:12<4:32:31, 7.15s/it] {'loss': 0.2889, 'grad_norm': 0.8406569236029452, 'learning_rate': 8.960848547567819e-07, 'epoch': 0.81} + 81%|████████ | 9901/12188 [1:55:12<4:32:31, 7.15s/it] 81%|████████ | 9902/12188 [1:55:21<4:52:16, 7.67s/it] {'loss': 0.2996, 'grad_norm': 0.6826955907718023, 'learning_rate': 8.95325989516066e-07, 'epoch': 0.81} + 81%|████████ | 9902/12188 [1:55:21<4:52:16, 7.67s/it] 81%|████████▏ | 9903/12188 [1:55:28<4:49:22, 7.60s/it] {'loss': 0.2625, 'grad_norm': 0.677333772562814, 'learning_rate': 8.94567414140835e-07, 'epoch': 0.81} + 81%|████████▏ | 9903/12188 [1:55:28<4:49:22, 7.60s/it] 81%|████████▏ | 9904/12188 [1:55:36<4:57:40, 7.82s/it] {'loss': 0.2882, 'grad_norm': 0.6481731982853305, 'learning_rate': 8.938091286846562e-07, 'epoch': 0.81} + 81%|████████▏ | 9904/12188 [1:55:36<4:57:40, 7.82s/it] 81%|████████▏ | 9905/12188 [1:55:43<4:46:24, 7.53s/it] {'loss': 0.2928, 'grad_norm': 0.7414621503123551, 'learning_rate': 8.930511332010794e-07, 'epoch': 0.81} + 81%|████████▏ | 9905/12188 [1:55:43<4:46:24, 7.53s/it] 81%|████████▏ | 9906/12188 [1:55:50<4:40:09, 7.37s/it] {'loss': 0.3072, 'grad_norm': 0.7275096625833021, 'learning_rate': 8.922934277436346e-07, 'epoch': 0.81} + 81%|████████▏ | 9906/12188 [1:55:50<4:40:09, 7.37s/it] 81%|████████▏ | 9907/12188 [1:55:57<4:35:47, 7.25s/it] {'loss': 0.3029, 'grad_norm': 0.6796553651016553, 'learning_rate': 8.915360123658273e-07, 'epoch': 0.81} + 81%|████████▏ | 9907/12188 [1:55:57<4:35:47, 7.25s/it] 81%|████████▏ | 9908/12188 [1:56:04<4:32:48, 7.18s/it] {'loss': 0.296, 'grad_norm': 0.7408342001434438, 'learning_rate': 8.90778887121147e-07, 'epoch': 0.81} + 81%|████████▏ | 9908/12188 [1:56:04<4:32:48, 7.18s/it] 81%|████████▏ | 9909/12188 [1:56:11<4:32:15, 7.17s/it] {'loss': 0.3074, 'grad_norm': 0.6336706381750581, 'learning_rate': 8.900220520630586e-07, 'epoch': 0.81} + 81%|████████▏ | 9909/12188 [1:56:11<4:32:15, 7.17s/it] 81%|████████▏ | 9910/12188 [1:56:19<4:32:39, 7.18s/it] {'loss': 0.3174, 'grad_norm': 0.7411025171443586, 'learning_rate': 8.892655072450101e-07, 'epoch': 0.81} + 81%|████████▏ | 9910/12188 [1:56:19<4:32:39, 7.18s/it] 81%|████████▏ | 9911/12188 [1:56:26<4:30:48, 7.14s/it] {'loss': 0.3298, 'grad_norm': 0.6784130267520196, 'learning_rate': 8.885092527204281e-07, 'epoch': 0.81} + 81%|████████▏ | 9911/12188 [1:56:26<4:30:48, 7.14s/it] 81%|████████▏ | 9912/12188 [1:56:33<4:29:37, 7.11s/it] {'loss': 0.2731, 'grad_norm': 0.6137762171306191, 'learning_rate': 8.87753288542717e-07, 'epoch': 0.81} + 81%|████████▏ | 9912/12188 [1:56:33<4:29:37, 7.11s/it] 81%|████████▏ | 9913/12188 [1:56:40<4:34:05, 7.23s/it] {'loss': 0.2933, 'grad_norm': 0.8206709166171217, 'learning_rate': 8.869976147652603e-07, 'epoch': 0.81} + 81%|████████▏ | 9913/12188 [1:56:40<4:34:05, 7.23s/it] 81%|████████▏ | 9914/12188 [1:56:48<4:43:28, 7.48s/it] {'loss': 0.3228, 'grad_norm': 0.6911130162651796, 'learning_rate': 8.86242231441426e-07, 'epoch': 0.81} + 81%|████████▏ | 9914/12188 [1:56:48<4:43:28, 7.48s/it] 81%|████████▏ | 9915/12188 [1:56:56<4:50:23, 7.67s/it] {'loss': 0.3027, 'grad_norm': 0.734282301224056, 'learning_rate': 8.854871386245539e-07, 'epoch': 0.81} + 81%|████████▏ | 9915/12188 [1:56:56<4:50:23, 7.67s/it] 81%|████████▏ | 9916/12188 [1:57:04<4:45:53, 7.55s/it] {'loss': 0.2883, 'grad_norm': 0.768857852188212, 'learning_rate': 8.847323363679716e-07, 'epoch': 0.81} + 81%|████████▏ | 9916/12188 [1:57:04<4:45:53, 7.55s/it] 81%|████████▏ | 9917/12188 [1:57:11<4:47:11, 7.59s/it] {'loss': 0.336, 'grad_norm': 0.679680829598957, 'learning_rate': 8.839778247249781e-07, 'epoch': 0.81} + 81%|████████▏ | 9917/12188 [1:57:11<4:47:11, 7.59s/it] 81%|████████▏ | 9918/12188 [1:57:18<4:39:48, 7.40s/it] {'loss': 0.2689, 'grad_norm': 0.6807649305761625, 'learning_rate': 8.832236037488584e-07, 'epoch': 0.81} + 81%|████████▏ | 9918/12188 [1:57:18<4:39:48, 7.40s/it] 81%|████████▏ | 9919/12188 [1:57:26<4:38:52, 7.37s/it] {'loss': 0.3684, 'grad_norm': 0.6770006535684625, 'learning_rate': 8.824696734928745e-07, 'epoch': 0.81} + 81%|████████▏ | 9919/12188 [1:57:26<4:38:52, 7.37s/it] 81%|████████▏ | 9920/12188 [1:57:33<4:34:54, 7.27s/it] {'loss': 0.3038, 'grad_norm': 0.6626749203148308, 'learning_rate': 8.817160340102659e-07, 'epoch': 0.81} + 81%|████████▏ | 9920/12188 [1:57:33<4:34:54, 7.27s/it] 81%|████████▏ | 9921/12188 [1:57:39<4:28:35, 7.11s/it] {'loss': 0.3032, 'grad_norm': 0.6823917293015234, 'learning_rate': 8.809626853542558e-07, 'epoch': 0.81} + 81%|████████▏ | 9921/12188 [1:57:39<4:28:35, 7.11s/it] 81%|████████▏ | 9922/12188 [1:57:46<4:24:27, 7.00s/it] {'loss': 0.3231, 'grad_norm': 0.8178245280121827, 'learning_rate': 8.802096275780414e-07, 'epoch': 0.81} + 81%|████████▏ | 9922/12188 [1:57:46<4:24:27, 7.00s/it] 81%|████████▏ | 9923/12188 [1:57:53<4:22:13, 6.95s/it] {'loss': 0.3289, 'grad_norm': 0.639351637384253, 'learning_rate': 8.794568607348042e-07, 'epoch': 0.81} + 81%|████████▏ | 9923/12188 [1:57:53<4:22:13, 6.95s/it] 81%|████████▏ | 9924/12188 [1:58:00<4:23:21, 6.98s/it] {'loss': 0.3344, 'grad_norm': 0.7451168787616431, 'learning_rate': 8.787043848777049e-07, 'epoch': 0.81} + 81%|████████▏ | 9924/12188 [1:58:00<4:23:21, 6.98s/it] 81%|████████▏ | 9925/12188 [1:58:07<4:26:55, 7.08s/it] {'loss': 0.3038, 'grad_norm': 0.7479108138895423, 'learning_rate': 8.779522000598794e-07, 'epoch': 0.81} + 81%|████████▏ | 9925/12188 [1:58:07<4:26:55, 7.08s/it] 81%|████████▏ | 9926/12188 [1:58:15<4:31:22, 7.20s/it] {'loss': 0.2921, 'grad_norm': 0.6805849312540139, 'learning_rate': 8.772003063344481e-07, 'epoch': 0.81} + 81%|████████▏ | 9926/12188 [1:58:15<4:31:22, 7.20s/it] 81%|████████▏ | 9927/12188 [1:58:22<4:31:22, 7.20s/it] {'loss': 0.2982, 'grad_norm': 0.7070605704215939, 'learning_rate': 8.764487037545071e-07, 'epoch': 0.81} + 81%|████████▏ | 9927/12188 [1:58:22<4:31:22, 7.20s/it] 81%|████████▏ | 9928/12188 [1:58:29<4:32:50, 7.24s/it] {'loss': 0.335, 'grad_norm': 0.809582963118306, 'learning_rate': 8.756973923731326e-07, 'epoch': 0.81} + 81%|████████▏ | 9928/12188 [1:58:29<4:32:50, 7.24s/it] 81%|████████▏ | 9929/12188 [1:58:37<4:34:22, 7.29s/it] {'loss': 0.291, 'grad_norm': 0.6360728126724817, 'learning_rate': 8.749463722433838e-07, 'epoch': 0.81} + 81%|████████▏ | 9929/12188 [1:58:37<4:34:22, 7.29s/it] 81%|████████▏ | 9930/12188 [1:58:44<4:32:19, 7.24s/it] {'loss': 0.311, 'grad_norm': 0.6553919895057515, 'learning_rate': 8.74195643418293e-07, 'epoch': 0.81} + 81%|████████▏ | 9930/12188 [1:58:44<4:32:19, 7.24s/it] 81%|████████▏ | 9931/12188 [1:58:50<4:25:02, 7.05s/it] {'loss': 0.2944, 'grad_norm': 0.7368048887751896, 'learning_rate': 8.734452059508786e-07, 'epoch': 0.81} + 81%|████████▏ | 9931/12188 [1:58:50<4:25:02, 7.05s/it] 81%|████████▏ | 9932/12188 [1:58:57<4:22:31, 6.98s/it] {'loss': 0.2753, 'grad_norm': 0.7350025076938042, 'learning_rate': 8.726950598941325e-07, 'epoch': 0.81} + 81%|████████▏ | 9932/12188 [1:58:57<4:22:31, 6.98s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f152ccb7c40> +[Try #0] Failed to fetch sample 4804801 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f152ccb7c40> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Download QR code'"}, {'from': 'gpt', 'value': '\nclick(x=0.8595, y=0.469)\n'}]} + 81%|████████▏ | 9933/12188 [1:59:04<4:24:28, 7.04s/it] {'loss': 0.2963, 'grad_norm': 0.6843042222402481, 'learning_rate': 8.719452053010308e-07, 'epoch': 0.81} + 81%|████████▏ | 9933/12188 [1:59:04<4:24:28, 7.04s/it] 82%|████████▏ | 9934/12188 [1:59:12<4:27:22, 7.12s/it] {'loss': 0.3402, 'grad_norm': 0.6986262444140938, 'learning_rate': 8.711956422245271e-07, 'epoch': 0.82} + 82%|████████▏ | 9934/12188 [1:59:12<4:27:22, 7.12s/it] 82%|████████▏ | 9935/12188 [1:59:19<4:23:50, 7.03s/it] {'loss': 0.2738, 'grad_norm': 0.714887100739989, 'learning_rate': 8.704463707175526e-07, 'epoch': 0.82} + 82%|████████▏ | 9935/12188 [1:59:19<4:23:50, 7.03s/it] 82%|████████▏ | 9936/12188 [1:59:25<4:22:49, 7.00s/it] {'loss': 0.3228, 'grad_norm': 0.6962748875407442, 'learning_rate': 8.696973908330209e-07, 'epoch': 0.82} + 82%|████████▏ | 9936/12188 [1:59:25<4:22:49, 7.00s/it] 82%|████████▏ | 9937/12188 [1:59:32<4:21:04, 6.96s/it] {'loss': 0.294, 'grad_norm': 0.8817106829733244, 'learning_rate': 8.689487026238247e-07, 'epoch': 0.82} + 82%|████████▏ | 9937/12188 [1:59:32<4:21:04, 6.96s/it] 82%|████████▏ | 9938/12188 [1:59:40<4:32:39, 7.27s/it] {'loss': 0.3068, 'grad_norm': 0.6780573148245364, 'learning_rate': 8.682003061428334e-07, 'epoch': 0.82} + 82%|████████▏ | 9938/12188 [1:59:40<4:32:39, 7.27s/it] 82%|████████▏ | 9939/12188 [1:59:47<4:26:53, 7.12s/it] {'loss': 0.3074, 'grad_norm': 0.7113575215364757, 'learning_rate': 8.674522014428988e-07, 'epoch': 0.82} + 82%|████████▏ | 9939/12188 [1:59:47<4:26:53, 7.12s/it] 82%|████████▏ | 9940/12188 [1:59:55<4:33:55, 7.31s/it] {'loss': 0.3185, 'grad_norm': 0.7277756280233242, 'learning_rate': 8.667043885768505e-07, 'epoch': 0.82} + 82%|████████▏ | 9940/12188 [1:59:55<4:33:55, 7.31s/it] 82%|████████▏ | 9941/12188 [2:00:02<4:27:50, 7.15s/it] {'loss': 0.2946, 'grad_norm': 0.7787673477250345, 'learning_rate': 8.659568675974967e-07, 'epoch': 0.82} + 82%|████████▏ | 9941/12188 [2:00:02<4:27:50, 7.15s/it] 82%|████████▏ | 9942/12188 [2:00:09<4:26:02, 7.11s/it] {'loss': 0.2877, 'grad_norm': 0.7514232969988364, 'learning_rate': 8.652096385576281e-07, 'epoch': 0.82} + 82%|████████▏ | 9942/12188 [2:00:09<4:26:02, 7.11s/it] 82%|████████▏ | 9943/12188 [2:00:16<4:28:31, 7.18s/it] {'loss': 0.3074, 'grad_norm': 0.734604795464531, 'learning_rate': 8.644627015100105e-07, 'epoch': 0.82} + 82%|████████▏ | 9943/12188 [2:00:16<4:28:31, 7.18s/it] 82%|████████▏ | 9944/12188 [2:00:24<4:41:59, 7.54s/it] {'loss': 0.2918, 'grad_norm': 0.6800082317226397, 'learning_rate': 8.637160565073938e-07, 'epoch': 0.82} + 82%|████████▏ | 9944/12188 [2:00:24<4:41:59, 7.54s/it] 82%|████████▏ | 9945/12188 [2:00:31<4:36:25, 7.39s/it] {'loss': 0.3225, 'grad_norm': 0.6962407807194965, 'learning_rate': 8.629697036025025e-07, 'epoch': 0.82} + 82%|████████▏ | 9945/12188 [2:00:31<4:36:25, 7.39s/it] 82%|████████▏ | 9946/12188 [2:00:39<4:35:09, 7.36s/it] {'loss': 0.2897, 'grad_norm': 0.7272088598278299, 'learning_rate': 8.622236428480441e-07, 'epoch': 0.82} + 82%|████████▏ | 9946/12188 [2:00:39<4:35:09, 7.36s/it] 82%|████████▏ | 9947/12188 [2:00:46<4:38:37, 7.46s/it] {'loss': 0.2466, 'grad_norm': 0.7433580985666725, 'learning_rate': 8.614778742967056e-07, 'epoch': 0.82} + 82%|████████▏ | 9947/12188 [2:00:46<4:38:37, 7.46s/it] 82%|████████▏ | 9948/12188 [2:00:54<4:38:43, 7.47s/it] {'loss': 0.2847, 'grad_norm': 0.70744928593701, 'learning_rate': 8.607323980011495e-07, 'epoch': 0.82} + 82%|████████▏ | 9948/12188 [2:00:54<4:38:43, 7.47s/it] 82%|████████▏ | 9949/12188 [2:01:03<4:57:06, 7.96s/it] {'loss': 0.2442, 'grad_norm': 0.6811113633596454, 'learning_rate': 8.59987214014022e-07, 'epoch': 0.82} + 82%|████████▏ | 9949/12188 [2:01:03<4:57:06, 7.96s/it] 82%|████████▏ | 9950/12188 [2:01:13<5:15:30, 8.46s/it] {'loss': 0.2619, 'grad_norm': 0.6340678426392529, 'learning_rate': 8.592423223879448e-07, 'epoch': 0.82} + 82%|████████▏ | 9950/12188 [2:01:13<5:15:30, 8.46s/it] 82%|████████▏ | 9951/12188 [2:01:20<5:00:22, 8.06s/it] {'loss': 0.3127, 'grad_norm': 0.6765765729763842, 'learning_rate': 8.584977231755226e-07, 'epoch': 0.82} + 82%|████████▏ | 9951/12188 [2:01:20<5:00:22, 8.06s/it] 82%|████████▏ | 9952/12188 [2:01:27<4:52:15, 7.84s/it] {'loss': 0.3216, 'grad_norm': 0.6887212684088112, 'learning_rate': 8.577534164293383e-07, 'epoch': 0.82} + 82%|████████▏ | 9952/12188 [2:01:27<4:52:15, 7.84s/it] 82%|████████▏ | 9953/12188 [2:01:34<4:45:46, 7.67s/it] {'loss': 0.2925, 'grad_norm': 0.7667301631689426, 'learning_rate': 8.570094022019515e-07, 'epoch': 0.82} + 82%|████████▏ | 9953/12188 [2:01:34<4:45:46, 7.67s/it] 82%|████████▏ | 9954/12188 [2:01:43<5:00:43, 8.08s/it] {'loss': 0.285, 'grad_norm': 0.707468684324811, 'learning_rate': 8.562656805459057e-07, 'epoch': 0.82} + 82%|████████▏ | 9954/12188 [2:01:43<5:00:43, 8.08s/it] 82%|████████▏ | 9955/12188 [2:01:53<5:21:12, 8.63s/it] {'loss': 0.2869, 'grad_norm': 0.6605149187691395, 'learning_rate': 8.555222515137201e-07, 'epoch': 0.82} + 82%|████████▏ | 9955/12188 [2:01:53<5:21:12, 8.63s/it] 82%|████████▏ | 9956/12188 [2:02:00<5:04:44, 8.19s/it] {'loss': 0.3007, 'grad_norm': 0.7041623193061963, 'learning_rate': 8.547791151578933e-07, 'epoch': 0.82} + 82%|████████▏ | 9956/12188 [2:02:00<5:04:44, 8.19s/it] 82%|████████▏ | 9957/12188 [2:02:09<5:10:54, 8.36s/it] {'loss': 0.3048, 'grad_norm': 0.6740889398139586, 'learning_rate': 8.54036271530907e-07, 'epoch': 0.82} + 82%|████████▏ | 9957/12188 [2:02:09<5:10:54, 8.36s/it] 82%|████████▏ | 9958/12188 [2:02:17<4:58:42, 8.04s/it] {'loss': 0.2891, 'grad_norm': 0.8495202292083532, 'learning_rate': 8.532937206852165e-07, 'epoch': 0.82} + 82%|████████▏ | 9958/12188 [2:02:17<4:58:42, 8.04s/it] 82%|████████▏ | 9959/12188 [2:02:24<4:48:49, 7.77s/it] {'loss': 0.3479, 'grad_norm': 0.9066434042320891, 'learning_rate': 8.525514626732617e-07, 'epoch': 0.82} + 82%|████████▏ | 9959/12188 [2:02:24<4:48:49, 7.77s/it] 82%|████████▏ | 9960/12188 [2:02:31<4:48:52, 7.78s/it] {'loss': 0.259, 'grad_norm': 0.6955111977068543, 'learning_rate': 8.5180949754746e-07, 'epoch': 0.82} + 82%|████████▏ | 9960/12188 [2:02:31<4:48:52, 7.78s/it] 82%|████████▏ | 9961/12188 [2:02:41<5:09:04, 8.33s/it] {'loss': 0.2901, 'grad_norm': 0.6574939419062198, 'learning_rate': 8.510678253602061e-07, 'epoch': 0.82} + 82%|████████▏ | 9961/12188 [2:02:41<5:09:04, 8.33s/it] 82%|████████▏ | 9962/12188 [2:02:48<4:55:14, 7.96s/it] {'loss': 0.2818, 'grad_norm': 0.6245118111989612, 'learning_rate': 8.503264461638783e-07, 'epoch': 0.82} + 82%|████████▏ | 9962/12188 [2:02:48<4:55:14, 7.96s/it] 82%|████████▏ | 9963/12188 [2:02:55<4:47:50, 7.76s/it] {'loss': 0.2742, 'grad_norm': 0.6716286979834424, 'learning_rate': 8.495853600108278e-07, 'epoch': 0.82} + 82%|████████▏ | 9963/12188 [2:02:55<4:47:50, 7.76s/it] 82%|████████▏ | 9964/12188 [2:03:02<4:37:56, 7.50s/it] {'loss': 0.3205, 'grad_norm': 0.7684944941848302, 'learning_rate': 8.488445669533918e-07, 'epoch': 0.82} + 82%|████████▏ | 9964/12188 [2:03:02<4:37:56, 7.50s/it] 82%|████████▏ | 9965/12188 [2:03:09<4:33:38, 7.39s/it] {'loss': 0.3245, 'grad_norm': 0.7316136487369692, 'learning_rate': 8.481040670438839e-07, 'epoch': 0.82} + 82%|████████▏ | 9965/12188 [2:03:09<4:33:38, 7.39s/it] 82%|████████▏ | 9966/12188 [2:03:16<4:26:07, 7.19s/it] {'loss': 0.307, 'grad_norm': 0.6745086820083381, 'learning_rate': 8.473638603345952e-07, 'epoch': 0.82} + 82%|████████▏ | 9966/12188 [2:03:16<4:26:07, 7.19s/it] 82%|████████▏ | 9967/12188 [2:03:25<4:42:27, 7.63s/it] {'loss': 0.3028, 'grad_norm': 0.7065048150368797, 'learning_rate': 8.466239468777998e-07, 'epoch': 0.82} + 82%|████████▏ | 9967/12188 [2:03:25<4:42:27, 7.63s/it] 82%|████████▏ | 9968/12188 [2:03:33<4:45:00, 7.70s/it] {'loss': 0.32, 'grad_norm': 0.7277758451816031, 'learning_rate': 8.458843267257489e-07, 'epoch': 0.82} + 82%|████████▏ | 9968/12188 [2:03:33<4:45:00, 7.70s/it] 82%|████████▏ | 9969/12188 [2:03:40<4:38:15, 7.52s/it] {'loss': 0.2868, 'grad_norm': 0.6858935315355583, 'learning_rate': 8.45144999930671e-07, 'epoch': 0.82} + 82%|████████▏ | 9969/12188 [2:03:40<4:38:15, 7.52s/it] 82%|████████▏ | 9970/12188 [2:03:47<4:28:48, 7.27s/it] {'loss': 0.3003, 'grad_norm': 0.6927879787167, 'learning_rate': 8.444059665447796e-07, 'epoch': 0.82} + 82%|████████▏ | 9970/12188 [2:03:47<4:28:48, 7.27s/it] 82%|████████▏ | 9971/12188 [2:03:54<4:31:47, 7.36s/it] {'loss': 0.2542, 'grad_norm': 0.6417163029946833, 'learning_rate': 8.436672266202611e-07, 'epoch': 0.82} + 82%|████████▏ | 9971/12188 [2:03:54<4:31:47, 7.36s/it] 82%|████████▏ | 9972/12188 [2:04:01<4:26:08, 7.21s/it] {'loss': 0.3429, 'grad_norm': 0.7768328978909367, 'learning_rate': 8.42928780209285e-07, 'epoch': 0.82} + 82%|████████▏ | 9972/12188 [2:04:01<4:26:08, 7.21s/it] 82%|████████▏ | 9973/12188 [2:04:10<4:48:20, 7.81s/it] {'loss': 0.3201, 'grad_norm': 0.7237764144885434, 'learning_rate': 8.421906273640012e-07, 'epoch': 0.82} + 82%|████████▏ | 9973/12188 [2:04:10<4:48:20, 7.81s/it] 82%|████████▏ | 9974/12188 [2:04:17<4:38:53, 7.56s/it] {'loss': 0.308, 'grad_norm': 0.8205095427411543, 'learning_rate': 8.414527681365342e-07, 'epoch': 0.82} + 82%|████████▏ | 9974/12188 [2:04:17<4:38:53, 7.56s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f018eff1df0> +[Try #0] Failed to fetch sample 4529592 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f018eff1df0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Future Focus Report 2024'"}, {'from': 'gpt', 'value': '\nclick(x=0.752, y=0.012)\n'}]} + 82%|████████▏ | 9975/12188 [2:04:25<4:44:13, 7.71s/it] {'loss': 0.3017, 'grad_norm': 0.7719295719181825, 'learning_rate': 8.407152025789922e-07, 'epoch': 0.82} + 82%|████████▏ | 9975/12188 [2:04:25<4:44:13, 7.71s/it] 82%|████████▏ | 9976/12188 [2:04:32<4:35:44, 7.48s/it] {'loss': 0.3314, 'grad_norm': 0.7414626548789739, 'learning_rate': 8.399779307434591e-07, 'epoch': 0.82} + 82%|████████▏ | 9976/12188 [2:04:32<4:35:44, 7.48s/it] 82%|████████▏ | 9977/12188 [2:04:40<4:38:12, 7.55s/it] {'loss': 0.2811, 'grad_norm': 0.7309156240620299, 'learning_rate': 8.392409526820011e-07, 'epoch': 0.82} + 82%|████████▏ | 9977/12188 [2:04:40<4:38:12, 7.55s/it] 82%|████████▏ | 9978/12188 [2:04:47<4:31:49, 7.38s/it] {'loss': 0.3244, 'grad_norm': 0.7815557466730577, 'learning_rate': 8.385042684466638e-07, 'epoch': 0.82} + 82%|████████▏ | 9978/12188 [2:04:47<4:31:49, 7.38s/it] 82%|████████▏ | 9979/12188 [2:04:55<4:43:45, 7.71s/it] {'loss': 0.2914, 'grad_norm': 0.7580607462806421, 'learning_rate': 8.377678780894671e-07, 'epoch': 0.82} + 82%|████████▏ | 9979/12188 [2:04:55<4:43:45, 7.71s/it] 82%|████████▏ | 9980/12188 [2:05:02<4:37:32, 7.54s/it] {'loss': 0.2902, 'grad_norm': 0.6933233868138898, 'learning_rate': 8.370317816624174e-07, 'epoch': 0.82} + 82%|████████▏ | 9980/12188 [2:05:02<4:37:32, 7.54s/it] 82%|████████▏ | 9981/12188 [2:05:10<4:36:15, 7.51s/it] {'loss': 0.2923, 'grad_norm': 0.7408589891522513, 'learning_rate': 8.362959792174941e-07, 'epoch': 0.82} + 82%|████████▏ | 9981/12188 [2:05:10<4:36:15, 7.51s/it] 82%|████████▏ | 9982/12188 [2:05:17<4:30:41, 7.36s/it] {'loss': 0.2863, 'grad_norm': 0.6956005918620577, 'learning_rate': 8.3556047080666e-07, 'epoch': 0.82} + 82%|████████▏ | 9982/12188 [2:05:17<4:30:41, 7.36s/it] 82%|████████▏ | 9983/12188 [2:05:26<4:55:03, 8.03s/it] {'loss': 0.3439, 'grad_norm': 0.6670572976685084, 'learning_rate': 8.348252564818549e-07, 'epoch': 0.82} + 82%|████████▏ | 9983/12188 [2:05:26<4:55:03, 8.03s/it] 82%|████████▏ | 9984/12188 [2:05:34<4:46:13, 7.79s/it] {'loss': 0.3135, 'grad_norm': 0.7449673275683398, 'learning_rate': 8.34090336294997e-07, 'epoch': 0.82} + 82%|████████▏ | 9984/12188 [2:05:34<4:46:13, 7.79s/it] 82%|████████▏ | 9985/12188 [2:05:42<4:54:04, 8.01s/it] {'loss': 0.3106, 'grad_norm': 0.7185149164108385, 'learning_rate': 8.333557102979878e-07, 'epoch': 0.82} + 82%|████████▏ | 9985/12188 [2:05:42<4:54:04, 8.01s/it] 82%|████████▏ | 9986/12188 [2:05:50<4:55:37, 8.06s/it] {'loss': 0.2813, 'grad_norm': 0.7295754044235199, 'learning_rate': 8.326213785427028e-07, 'epoch': 0.82} + 82%|████████▏ | 9986/12188 [2:05:50<4:55:37, 8.06s/it] 82%|████████▏ | 9987/12188 [2:06:00<5:09:49, 8.45s/it] {'loss': 0.3144, 'grad_norm': 0.6717306364870657, 'learning_rate': 8.318873410810008e-07, 'epoch': 0.82} + 82%|████████▏ | 9987/12188 [2:06:00<5:09:49, 8.45s/it] 82%|████████▏ | 9988/12188 [2:06:07<4:54:40, 8.04s/it] {'loss': 0.3005, 'grad_norm': 0.7455687987278119, 'learning_rate': 8.31153597964719e-07, 'epoch': 0.82} + 82%|████████▏ | 9988/12188 [2:06:07<4:54:40, 8.04s/it] 82%|████████▏ | 9989/12188 [2:06:17<5:15:48, 8.62s/it] {'loss': 0.3392, 'grad_norm': 0.7306154155515208, 'learning_rate': 8.304201492456715e-07, 'epoch': 0.82} + 82%|████████▏ | 9989/12188 [2:06:17<5:15:48, 8.62s/it] 82%|████████▏ | 9990/12188 [2:06:24<5:02:34, 8.26s/it] {'loss': 0.2926, 'grad_norm': 0.6679741066361008, 'learning_rate': 8.296869949756542e-07, 'epoch': 0.82} + 82%|████████▏ | 9990/12188 [2:06:24<5:02:34, 8.26s/it] 82%|████████▏ | 9991/12188 [2:06:31<4:45:07, 7.79s/it] {'loss': 0.3514, 'grad_norm': 0.6854579841453268, 'learning_rate': 8.289541352064423e-07, 'epoch': 0.82} + 82%|████████▏ | 9991/12188 [2:06:31<4:45:07, 7.79s/it] 82%|████████▏ | 9992/12188 [2:06:38<4:34:43, 7.51s/it] {'loss': 0.2877, 'grad_norm': 0.6929100028244656, 'learning_rate': 8.282215699897867e-07, 'epoch': 0.82} + 82%|████████▏ | 9992/12188 [2:06:38<4:34:43, 7.51s/it] 82%|████████▏ | 9993/12188 [2:06:45<4:30:04, 7.38s/it] {'loss': 0.2878, 'grad_norm': 0.7848311685635613, 'learning_rate': 8.27489299377423e-07, 'epoch': 0.82} + 82%|████████▏ | 9993/12188 [2:06:45<4:30:04, 7.38s/it] 82%|████████▏ | 9994/12188 [2:06:53<4:33:31, 7.48s/it] {'loss': 0.2812, 'grad_norm': 0.9385172372893661, 'learning_rate': 8.267573234210596e-07, 'epoch': 0.82} + 82%|████████▏ | 9994/12188 [2:06:53<4:33:31, 7.48s/it] 82%|████████▏ | 9995/12188 [2:07:00<4:34:23, 7.51s/it] {'loss': 0.3147, 'grad_norm': 0.9220113878802888, 'learning_rate': 8.260256421723905e-07, 'epoch': 0.82} + 82%|████████▏ | 9995/12188 [2:07:00<4:34:23, 7.51s/it] 82%|████████▏ | 9996/12188 [2:07:07<4:31:26, 7.43s/it] {'loss': 0.3002, 'grad_norm': 0.6513279481313993, 'learning_rate': 8.25294255683085e-07, 'epoch': 0.82} + 82%|████████▏ | 9996/12188 [2:07:07<4:31:26, 7.43s/it] 82%|████████▏ | 9997/12188 [2:07:14<4:26:21, 7.29s/it] {'loss': 0.2812, 'grad_norm': 0.6686831763993318, 'learning_rate': 8.245631640047902e-07, 'epoch': 0.82} + 82%|████████▏ | 9997/12188 [2:07:14<4:26:21, 7.29s/it] 82%|████████▏ | 9998/12188 [2:07:22<4:32:08, 7.46s/it] {'loss': 0.3124, 'grad_norm': 0.6545384111288217, 'learning_rate': 8.23832367189138e-07, 'epoch': 0.82} + 82%|████████▏ | 9998/12188 [2:07:22<4:32:08, 7.46s/it] 82%|████████▏ | 9999/12188 [2:07:31<4:46:38, 7.86s/it] {'loss': 0.3299, 'grad_norm': 0.6723783770365839, 'learning_rate': 8.231018652877332e-07, 'epoch': 0.82} + 82%|████████▏ | 9999/12188 [2:07:31<4:46:38, 7.86s/it] 82%|████████▏ | 10000/12188 [2:07:39<4:47:14, 7.88s/it] {'loss': 0.2985, 'grad_norm': 0.6863357748478488, 'learning_rate': 8.223716583521641e-07, 'epoch': 0.82} + 82%|████████▏ | 10000/12188 [2:07:39<4:47:14, 7.88s/it]/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/utils/checkpoint.py:87: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( + 82%|████████▏ | 10001/12188 [2:08:04<7:53:18, 12.99s/it] {'loss': 0.3116, 'grad_norm': 0.748964075301305, 'learning_rate': 8.216417464339977e-07, 'epoch': 0.82} + 82%|████████▏ | 10001/12188 [2:08:04<7:53:18, 12.99s/it] 82%|████████▏ | 10002/12188 [2:08:11<6:50:25, 11.26s/it] {'loss': 0.3155, 'grad_norm': 0.6664385432164222, 'learning_rate': 8.209121295847766e-07, 'epoch': 0.82} + 82%|████████▏ | 10002/12188 [2:08:11<6:50:25, 11.26s/it] 82%|████████▏ | 10003/12188 [2:08:20<6:25:46, 10.59s/it] {'loss': 0.2921, 'grad_norm': 0.7116886022858407, 'learning_rate': 8.201828078560281e-07, 'epoch': 0.82} + 82%|████████▏ | 10003/12188 [2:08:20<6:25:46, 10.59s/it] 82%|████████▏ | 10004/12188 [2:08:27<5:45:13, 9.48s/it] {'loss': 0.2936, 'grad_norm': 0.7169760869222224, 'learning_rate': 8.194537812992531e-07, 'epoch': 0.82} + 82%|████████▏ | 10004/12188 [2:08:27<5:45:13, 9.48s/it] 82%|████████▏ | 10005/12188 [2:08:34<5:16:59, 8.71s/it] {'loss': 0.2744, 'grad_norm': 0.7152694537772588, 'learning_rate': 8.187250499659361e-07, 'epoch': 0.82} + 82%|████████▏ | 10005/12188 [2:08:34<5:16:59, 8.71s/it] 82%|████████▏ | 10006/12188 [2:08:41<4:56:56, 8.17s/it] {'loss': 0.32, 'grad_norm': 0.7527248546990273, 'learning_rate': 8.17996613907539e-07, 'epoch': 0.82} + 82%|████████▏ | 10006/12188 [2:08:41<4:56:56, 8.17s/it] 82%|████████▏ | 10007/12188 [2:08:48<4:41:51, 7.75s/it] {'loss': 0.306, 'grad_norm': 1.0524730468911763, 'learning_rate': 8.17268473175501e-07, 'epoch': 0.82} + 82%|████████▏ | 10007/12188 [2:08:48<4:41:51, 7.75s/it] 82%|████████▏ | 10008/12188 [2:08:55<4:40:38, 7.72s/it] {'loss': 0.3095, 'grad_norm': 0.708906907564341, 'learning_rate': 8.165406278212445e-07, 'epoch': 0.82} + 82%|████████▏ | 10008/12188 [2:08:55<4:40:38, 7.72s/it] 82%|████████▏ | 10009/12188 [2:09:03<4:37:11, 7.63s/it] {'loss': 0.3006, 'grad_norm': 0.7284204115346431, 'learning_rate': 8.158130778961665e-07, 'epoch': 0.82} + 82%|████████▏ | 10009/12188 [2:09:03<4:37:11, 7.63s/it] 82%|████████▏ | 10010/12188 [2:09:10<4:32:16, 7.50s/it] {'loss': 0.2933, 'grad_norm': 0.7174162945732486, 'learning_rate': 8.150858234516474e-07, 'epoch': 0.82} + 82%|████████▏ | 10010/12188 [2:09:10<4:32:16, 7.50s/it] 82%|████████▏ | 10011/12188 [2:09:17<4:26:23, 7.34s/it] {'loss': 0.2877, 'grad_norm': 0.7191048862394138, 'learning_rate': 8.143588645390443e-07, 'epoch': 0.82} + 82%|████████▏ | 10011/12188 [2:09:17<4:26:23, 7.34s/it] 82%|████████▏ | 10012/12188 [2:09:24<4:29:18, 7.43s/it] {'loss': 0.2815, 'grad_norm': 0.7118445967534477, 'learning_rate': 8.13632201209692e-07, 'epoch': 0.82} + 82%|████████▏ | 10012/12188 [2:09:24<4:29:18, 7.43s/it] 82%|████████▏ | 10013/12188 [2:09:32<4:27:13, 7.37s/it] {'loss': 0.2742, 'grad_norm': 0.7387666511587165, 'learning_rate': 8.129058335149076e-07, 'epoch': 0.82} + 82%|████████▏ | 10013/12188 [2:09:32<4:27:13, 7.37s/it] 82%|████████▏ | 10014/12188 [2:09:39<4:30:20, 7.46s/it] {'loss': 0.3039, 'grad_norm': 0.723003646274061, 'learning_rate': 8.121797615059878e-07, 'epoch': 0.82} + 82%|████████▏ | 10014/12188 [2:09:39<4:30:20, 7.46s/it] 82%|████████▏ | 10015/12188 [2:09:46<4:22:07, 7.24s/it] {'loss': 0.3238, 'grad_norm': 0.6667074510080533, 'learning_rate': 8.114539852342035e-07, 'epoch': 0.82} + 82%|████████▏ | 10015/12188 [2:09:46<4:22:07, 7.24s/it] 82%|████████▏ | 10016/12188 [2:09:54<4:26:54, 7.37s/it] {'loss': 0.2765, 'grad_norm': 0.6665957825971004, 'learning_rate': 8.107285047508106e-07, 'epoch': 0.82} + 82%|████████▏ | 10016/12188 [2:09:54<4:26:54, 7.37s/it] 82%|████████▏ | 10017/12188 [2:10:01<4:20:26, 7.20s/it] {'loss': 0.3108, 'grad_norm': 0.655323139855212, 'learning_rate': 8.10003320107039e-07, 'epoch': 0.82} + 82%|████████▏ | 10017/12188 [2:10:01<4:20:26, 7.20s/it] 82%|████████▏ | 10018/12188 [2:10:08<4:19:47, 7.18s/it] {'loss': 0.3285, 'grad_norm': 0.7054385402087953, 'learning_rate': 8.092784313541013e-07, 'epoch': 0.82} + 82%|████████▏ | 10018/12188 [2:10:08<4:19:47, 7.18s/it] 82%|████████▏ | 10019/12188 [2:10:15<4:18:14, 7.14s/it] {'loss': 0.2842, 'grad_norm': 0.701055078956579, 'learning_rate': 8.085538385431885e-07, 'epoch': 0.82} + 82%|████████▏ | 10019/12188 [2:10:15<4:18:14, 7.14s/it] 82%|████████▏ | 10020/12188 [2:10:22<4:19:38, 7.19s/it] {'loss': 0.2659, 'grad_norm': 0.6870722677807632, 'learning_rate': 8.078295417254689e-07, 'epoch': 0.82} + 82%|████████▏ | 10020/12188 [2:10:22<4:19:38, 7.19s/it] 82%|████████▏ | 10021/12188 [2:10:30<4:24:50, 7.33s/it] {'loss': 0.2759, 'grad_norm': 0.7213808198709604, 'learning_rate': 8.071055409520928e-07, 'epoch': 0.82} + 82%|████████▏ | 10021/12188 [2:10:30<4:24:50, 7.33s/it] 82%|████████▏ | 10022/12188 [2:10:41<5:07:09, 8.51s/it] {'loss': 0.3003, 'grad_norm': 0.7523902862564465, 'learning_rate': 8.063818362741859e-07, 'epoch': 0.82} + 82%|████████▏ | 10022/12188 [2:10:41<5:07:09, 8.51s/it] 82%|████████▏ | 10023/12188 [2:10:48<4:51:06, 8.07s/it] {'loss': 0.3079, 'grad_norm': 0.8493944482556564, 'learning_rate': 8.056584277428576e-07, 'epoch': 0.82} + 82%|████████▏ | 10023/12188 [2:10:48<4:51:06, 8.07s/it] 82%|████████▏ | 10024/12188 [2:10:56<4:46:25, 7.94s/it] {'loss': 0.3205, 'grad_norm': 0.6987448249495152, 'learning_rate': 8.049353154091921e-07, 'epoch': 0.82} + 82%|████████▏ | 10024/12188 [2:10:56<4:46:25, 7.94s/it] 82%|████████▏ | 10025/12188 [2:11:03<4:43:58, 7.88s/it] {'loss': 0.3016, 'grad_norm': 0.6663253252226553, 'learning_rate': 8.042124993242534e-07, 'epoch': 0.82} + 82%|████████▏ | 10025/12188 [2:11:03<4:43:58, 7.88s/it] 82%|████████▏ | 10026/12188 [2:11:13<5:03:40, 8.43s/it] {'loss': 0.3021, 'grad_norm': 0.7083763981926977, 'learning_rate': 8.034899795390871e-07, 'epoch': 0.82} + 82%|████████▏ | 10026/12188 [2:11:13<5:03:40, 8.43s/it] 82%|████████▏ | 10027/12188 [2:11:21<4:58:03, 8.28s/it] {'loss': 0.2997, 'grad_norm': 0.7138172073652289, 'learning_rate': 8.027677561047176e-07, 'epoch': 0.82} + 82%|████████▏ | 10027/12188 [2:11:21<4:58:03, 8.28s/it] 82%|████████▏ | 10028/12188 [2:11:28<4:48:12, 8.01s/it] {'loss': 0.3002, 'grad_norm': 0.7656877174135529, 'learning_rate': 8.020458290721445e-07, 'epoch': 0.82} + 82%|████████▏ | 10028/12188 [2:11:28<4:48:12, 8.01s/it] 82%|████████▏ | 10029/12188 [2:11:35<4:34:00, 7.61s/it] {'loss': 0.2741, 'grad_norm': 0.6914056555897813, 'learning_rate': 8.013241984923514e-07, 'epoch': 0.82} + 82%|████████▏ | 10029/12188 [2:11:35<4:34:00, 7.61s/it] 82%|████████▏ | 10030/12188 [2:11:43<4:39:56, 7.78s/it] {'loss': 0.2755, 'grad_norm': 0.6471968899798602, 'learning_rate': 8.006028644162966e-07, 'epoch': 0.82} + 82%|████████▏ | 10030/12188 [2:11:43<4:39:56, 7.78s/it] 82%|████████▏ | 10031/12188 [2:11:50<4:29:20, 7.49s/it] {'loss': 0.2865, 'grad_norm': 0.7532283776440171, 'learning_rate': 7.99881826894921e-07, 'epoch': 0.82} + 82%|████████▏ | 10031/12188 [2:11:50<4:29:20, 7.49s/it] 82%|████████▏ | 10032/12188 [2:11:58<4:29:40, 7.51s/it] {'loss': 0.2739, 'grad_norm': 0.68344924470338, 'learning_rate': 7.991610859791438e-07, 'epoch': 0.82} + 82%|████████▏ | 10032/12188 [2:11:58<4:29:40, 7.51s/it] 82%|████████▏ | 10033/12188 [2:12:05<4:24:09, 7.35s/it] {'loss': 0.2989, 'grad_norm': 0.7657452462433578, 'learning_rate': 7.984406417198604e-07, 'epoch': 0.82} + 82%|████████▏ | 10033/12188 [2:12:05<4:24:09, 7.35s/it] 82%|████████▏ | 10034/12188 [2:12:12<4:28:46, 7.49s/it] {'loss': 0.3031, 'grad_norm': 0.8318229047047246, 'learning_rate': 7.977204941679495e-07, 'epoch': 0.82} + 82%|████████▏ | 10034/12188 [2:12:12<4:28:46, 7.49s/it] 82%|████████▏ | 10035/12188 [2:12:21<4:44:03, 7.92s/it] {'loss': 0.2868, 'grad_norm': 1.2015089069040774, 'learning_rate': 7.97000643374265e-07, 'epoch': 0.82} + 82%|████████▏ | 10035/12188 [2:12:21<4:44:03, 7.92s/it] 82%|████████▏ | 10036/12188 [2:12:29<4:43:04, 7.89s/it] {'loss': 0.305, 'grad_norm': 0.6680420737997413, 'learning_rate': 7.962810893896433e-07, 'epoch': 0.82} + 82%|████████▏ | 10036/12188 [2:12:29<4:43:04, 7.89s/it] 82%|████████▏ | 10037/12188 [2:12:36<4:30:56, 7.56s/it] {'loss': 0.3238, 'grad_norm': 0.6920528483525261, 'learning_rate': 7.955618322648961e-07, 'epoch': 0.82} + 82%|████████▏ | 10037/12188 [2:12:36<4:30:56, 7.56s/it] 82%|████████▏ | 10038/12188 [2:12:43<4:26:35, 7.44s/it] {'loss': 0.3335, 'grad_norm': 0.714557613067069, 'learning_rate': 7.948428720508189e-07, 'epoch': 0.82} + 82%|████████▏ | 10038/12188 [2:12:43<4:26:35, 7.44s/it] 82%|████████▏ | 10039/12188 [2:12:50<4:21:09, 7.29s/it] {'loss': 0.314, 'grad_norm': 0.7465169800278302, 'learning_rate': 7.941242087981816e-07, 'epoch': 0.82} + 82%|████████▏ | 10039/12188 [2:12:50<4:21:09, 7.29s/it] 82%|████████▏ | 10040/12188 [2:12:57<4:14:19, 7.10s/it] {'loss': 0.2879, 'grad_norm': 0.8650660289986263, 'learning_rate': 7.93405842557734e-07, 'epoch': 0.82} + 82%|████████▏ | 10040/12188 [2:12:57<4:14:19, 7.10s/it] 82%|████████▏ | 10041/12188 [2:13:04<4:12:47, 7.06s/it] {'loss': 0.322, 'grad_norm': 0.8004135119559395, 'learning_rate': 7.92687773380208e-07, 'epoch': 0.82} + 82%|████████▏ | 10041/12188 [2:13:04<4:12:47, 7.06s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fb96194f6a0> +[Try #0] Failed to fetch sample 4409357 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fb96194f6a0> +Problematic sample: {'image': '20240823_045930_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Data Providers'"}, {'from': 'gpt', 'value': '\nclick(x=0.8015, y=0.921)\n'}]} + 82%|████████▏ | 10042/12188 [2:13:12<4:21:37, 7.31s/it] {'loss': 0.266, 'grad_norm': 0.6886875950608488, 'learning_rate': 7.919700013163128e-07, 'epoch': 0.82} + 82%|████████▏ | 10042/12188 [2:13:12<4:21:37, 7.31s/it] 82%|████████▏ | 10043/12188 [2:13:18<4:16:56, 7.19s/it] {'loss': 0.3084, 'grad_norm': 0.6542437095774356, 'learning_rate': 7.912525264167342e-07, 'epoch': 0.82} + 82%|████████▏ | 10043/12188 [2:13:19<4:16:56, 7.19s/it] 82%|████████▏ | 10044/12188 [2:13:26<4:21:33, 7.32s/it] {'loss': 0.3039, 'grad_norm': 0.7259115775263698, 'learning_rate': 7.905353487321404e-07, 'epoch': 0.82} + 82%|████████▏ | 10044/12188 [2:13:26<4:21:33, 7.32s/it] 82%|████████▏ | 10045/12188 [2:13:34<4:32:34, 7.63s/it] {'loss': 0.3283, 'grad_norm': 0.7339710162011958, 'learning_rate': 7.898184683131782e-07, 'epoch': 0.82} + 82%|████████▏ | 10045/12188 [2:13:34<4:32:34, 7.63s/it] 82%|████████▏ | 10046/12188 [2:13:42<4:31:58, 7.62s/it] {'loss': 0.3075, 'grad_norm': 0.7313727318826256, 'learning_rate': 7.891018852104709e-07, 'epoch': 0.82} + 82%|████████▏ | 10046/12188 [2:13:42<4:31:58, 7.62s/it]W0817 19:42:32.430000 129392 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 19:42:32.430000 129392 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 19:42:32.430000 129392 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 19:42:32.430000 129392 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 19:42:32.924000 114928 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 19:42:32.924000 114928 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 19:42:32.924000 114928 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 19:42:32.924000 114928 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 19:42:32.904000 42058 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 19:42:32.904000 42058 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 19:42:32.904000 42058 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 19:42:32.904000 42058 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 19:42:33.646000 40188 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 19:42:33.646000 40188 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 19:42:33.646000 40188 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 19:42:33.646000 40188 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 19:42:37.525000 42754 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 19:42:37.525000 42754 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 19:42:37.525000 42754 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 19:42:37.525000 42754 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 19:42:38.705000 117546 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 19:42:38.705000 117546 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 19:42:38.705000 117546 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 19:42:38.705000 117546 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 19:42:38.727000 4104 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 19:42:38.727000 4104 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 19:42:38.727000 4104 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 19:42:38.727000 4104 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-17 19:42:54,683] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,683] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,683] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,701] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,701] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,701] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,702] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,702] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,698] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,699] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,699] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,699] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,705] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,705] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,705] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,705] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,704] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,705] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,705] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,706] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,709] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,709] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,709] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,709] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,725] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,725] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,725] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:54,725] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:56,113] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:56,115] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:56,115] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:56,118] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:56,118] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:56,119] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:56,119] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:56,119] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:42:59,484] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,484] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,484] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,484] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,484] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,484] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-17 19:42:59,484] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,478] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,478] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,478] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,478] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,478] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,479] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,480] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,486] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,487] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,480] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,480] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,480] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,480] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,480] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,480] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,481] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,484] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,704] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,704] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,704] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,704] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,704] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,704] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,704] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,708] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:42:59,953] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:42:59,964] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:42:59,978] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:42:59,971] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:00,042] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:00,062] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:00,071] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:00,057] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:00,071] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:00,072] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:00,077] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:00,081] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:00,088] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:00,088] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:00,090] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:00,093] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:00,096] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:00,117] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:00,125] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:00,125] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:00,126] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:00,127] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:00,136] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:00,136] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:00,137] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:00,137] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:00,137] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:00,131] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:00,133] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:00,594] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:00,730] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:00,736] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:00,727] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:00,750] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator [2025-08-17 19:43:00,733] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:00,739] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:00,739] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:00,741] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:00,742] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:00,765] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:00,765] [INFO] [real_accelerator.pYou are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:00,788] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:00,788] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:00,788] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:07,056] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:07,058] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:07,058] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:07,064] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:07,067] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:07,072] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:07,074] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:07,078] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:07,211] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:07,212] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:07,212] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:07,221] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:07,221] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:07,221] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:07,221] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:07,221] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:07,206] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:07,208] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:07,208] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:07,215] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:07,217] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:07,217] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:07,218] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:07,217] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 19:43:07,779] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:07,995] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:07,996] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:07,998] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:08,001] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:08,003] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:08,006] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:08,013] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:13,792] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:13,813] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:13,792] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:13,792] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:13,792] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:13,792] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:13,792] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:13,792] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:13,813] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:13,813] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:13,813] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:13,813] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:13,813] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:13,815] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:13,817] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 19:43:14,266] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:14,400] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:14,403] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:14,405] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:14,407] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:14,407] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:14,408] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:14,409] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:15,120] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:15,282] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:15,285] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:15,286] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 19:43:15,287] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:15,289] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:15,289] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 19:43:15,289] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 621928 samples from VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl +Rank 0: Loading altas_windows +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 537672 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl +Rank 0: Loading altas_linux +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 21578 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl +Rank 0: Loading atlas_macos +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 3680 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl +Rank 0: Loading android_action_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 5621 samples from VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 11980 samples from VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl +Rank 0: Loading web_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9459 samples from VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 328 samples from VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 54 samples from VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 480 samples from VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 240 samples from VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 944 samples from VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 472 samples from VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading mac_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 1578 samples from VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20394 samples from VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl +Rank 0: Loading web_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 14285 samples from VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl +Rank 0: Loading android_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 3590 samples from VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 21422 samples from VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 100 samples from VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_aug_cropping_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 7675 samples from VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20116 samples from VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl +Rank 0: Loading iphone_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2520 samples from VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl +Rank 0: Loading mac_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7814 samples from VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl +Rank 0: Loading android_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 17838 samples from VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading android_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9008 samples from VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_canvas_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 624 samples from VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl with random:50% sampling strategy +W0817 20:03:02.228000 27985 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 20:03:02.228000 27985 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 20:03:02.228000 27985 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 20:03:02.228000 27985 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 20:03:02.290000 95687 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 20:03:02.290000 95687 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 20:03:02.290000 95687 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 20:03:02.290000 95687 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 20:03:02.593000 87284 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 20:03:02.593000 87284 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 20:03:02.593000 87284 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 20:03:02.593000 87284 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 20:03:05.513000 116086 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 20:03:05.513000 116086 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 20:03:05.513000 116086 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 20:03:05.513000 116086 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 20:03:06.686000 87509 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 20:03:06.686000 87509 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 20:03:06.686000 87509 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 20:03:06.686000 87509 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 20:03:07.461000 79133 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 20:03:07.461000 79133 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 20:03:07.461000 79133 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 20:03:07.461000 79133 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 20:03:07.452000 115035 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 20:03:07.452000 115035 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 20:03:07.452000 115035 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 20:03:07.452000 115035 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-17 20:03:20,980] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:20,981] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:20,981] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:20,982] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:20,982] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:20,982] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:20,982] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:20,982] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:20,989] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:20,990] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:20,990] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:20,990] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:21,000] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:21,000] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:21,001] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:21,001] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:21,611] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:21,611] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:21,612] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:21,613] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:21,613] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:21,613] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:21,613] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:21,613] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:24,391] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:24,397] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:24,391] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:24,391] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:24,391] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:24,391] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:24,391] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:24,391] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:24,397] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:24,397] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:24,397] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-17 20:03:24,397] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:24,397] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:24,398] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:24,399] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:24,400] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:24,827] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:24,880] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:24,957] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:24,960] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:24,962] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:24,966] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:24,967] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:24,967] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:24,967] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:25,029] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:25,032] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:25,033] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:25,035] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:25,036] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:25,036] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:25,037] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:25,440] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:25,440] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:25,440] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:25,440] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:25,440] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:25,440] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:25,441] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:25,444] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:25,908] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:26,041] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:26,046] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:26,046] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:26,046] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:26,047] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:26,050] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:26,052] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:29,282] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:29,319] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:29,325] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:29,328] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:29,352] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:29,355] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:29,355] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:29,356] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:32,929] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:32,931] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:32,996] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:32,996] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:32,996] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:32,997] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:32,997] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:32,996] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:34,971] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,018] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,031] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,035] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,035] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,038] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,039] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,042] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,042] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,046] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,047] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,047] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,049] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,049] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,048] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,051] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,053] [INFO] [comm.py:652:init_distributed] cdb=None +] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,022] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,023] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,023] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,023] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,023] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,023] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 20:03:35,055] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:35,060] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:35,060] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:35,061] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:35,062] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:35,064] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:35,069] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:35,679] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:35,904] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:35,909] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:35,912] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:35,913] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:35,916] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:35,918] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:35,924] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:38,889] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:38,890] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:38,890] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:38,890] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:38,891] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:38,891] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:38,891] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:38,893] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:39,353] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:39,491] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:39,497] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:39,497] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:39,498] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:39,498] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:39,499] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:39,502] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:41,581] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:41,624] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:41,611] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:41,624] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:41,624] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:41,624] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:41,624] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:41,624] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:41,626] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:41,582] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 20:03:42,063] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:42,091] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:42,176] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:42,183] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:42,184] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:42,184] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:42,185] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:42,188] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:42,190] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:42,205] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:42,171] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:42,172] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:42,218] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:42,218] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:42,220] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:42,220] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:42,222] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 20:03:42,177] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:42,225] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 20:03:42,187] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 621928 samples from VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl +Rank 0: Loading altas_windows +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 537672 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl +Rank 0: Loading altas_linux +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 21578 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl +Rank 0: Loading atlas_macos +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 3680 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl +Rank 0: Loading android_action_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 5621 samples from VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 11980 samples from VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl +Rank 0: Loading web_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9459 samples from VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 328 samples from VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 54 samples from VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 480 samples from VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 240 samples from VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 944 samples from VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 472 samples from VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading mac_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 1578 samples from VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20394 samples from VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl +Rank 0: Loading web_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 14285 samples from VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl +Rank 0: Loading android_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 3590 samples from VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 21422 samples from VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 100 samples from VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_aug_cropping_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 7675 samples from VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20116 samples from VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl +Rank 0: Loading iphone_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2520 samples from VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl +Rank 0: Loading mac_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7814 samples from VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl +Rank 0: Loading android_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 17838 samples from VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading android_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9008 samples from VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_canvas_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 624 samples from VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 100652 samples from VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl with all sampling strategy +Rank 0: Loaded 28346 samples from VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl +Rank 0: Loading android_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 4907 samples from VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2635 samples from VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5234 samples from VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading ubuntu_action_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl with all sampling strategy +Rank 0: Loaded 3404 samples from VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_1 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2855 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_2 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 655 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_3 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_4 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2643 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_1 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_2 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_3 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_4 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_5 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_6 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_7 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_8 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_crop_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 358 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7946 samples from VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3973 samples from VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 993 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 630 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 249 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2538 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1269 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 172 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl +Rank 0: Loading android_ocr_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl with all sampling strategy +Rank 0: Loaded 29878 samples from VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl +Rank 0: Loading mac_orc_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl with all sampling strategy +Rank 0: Loaded 4393 samples from VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 254 samples from VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_click_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl with random:80% sampling strategy +Rank 0: Loaded 1802 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 53 samples from VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 6578 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 3287 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 542 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_crop_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 270 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 10908 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 5453 samples from VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading android_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13950 samples from VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl +Rank 0: Loading windows_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 12390 samples from VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl +Rank 0: Loading web_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13402 samples from VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl +Rank 0: Loading icon_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl with all sampling strategy +Rank 0: Loaded 81303 samples from VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl +Rank 0: Loading mac_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 1251 samples from VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl +Rank 0: Loading iphone_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 27849 samples from VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl +Rank 0: Loading web_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl with random:80% sampling strategy +Rank 0: Loaded 51607 samples from VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl +Rank 0: Loading android_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl with random:80% sampling strategy +Rank 0: Loaded 6281 samples from VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl +Rank 0: Loading windows_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 25961 samples from VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl +Rank 0: Loading windows_cropping_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl with random:40% sampling strategy +Rank 0: Loaded 9763 samples from VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl +Rank 0: Loading iphone_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl with all sampling strategy +Rank 0: Loaded 16896 samples from VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl +Rank 0: Loading iphone_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl with all sampling strategy +Rank 0: Loaded 927 samples from VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl +Rank 0: Loading mac_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl with all sampling strategy +Rank 0: Loaded 3051 samples from VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl +Rank 0: Loading android_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 25217 samples from VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading android_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 12677 samples from VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading web_canvas_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl with random:40% sampling strategy +Rank 0: Loaded 1566 samples from VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl +Rank 0: Loading web_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 174170 samples from VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 24862 samples from VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl +Rank 0: Loading android_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl with random:80% sampling strategy +Rank 0: Loaded 9142 samples from VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl +Rank 0: Loading windows_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 4229 samples from VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 4200 samples from VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl with random:80% sampling strategy +Rank 0: Loaded 2868 samples from VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_crop_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1372 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 590 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl with all sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1692 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1077 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1070 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 431 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 292 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1509 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 1207 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading uibert_train_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 4646 samples from VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl +Rank 0: Loading openapp_taperception_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 2500 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_widget_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 14878 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_mug_grounding_d240812 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl with all sampling strategy +Rank 0: Loaded 26090 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl +Rank 0: Loading private_ui_phone_2403_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 24798 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ground_d240521_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl with all sampling strategy +Rank 0: Loaded 5008 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl +Rank 0: Loading private_ui_aig_share_2406_ground_d240612_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl with all sampling strategy +Rank 0: Loaded 7903 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl +Rank 0: Loading windows_pc_agent_e_planning_cot +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 55564 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl +Rank 0: Loading windows_pc_agent_e_navigation +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl with all sampling strategy +Rank 0: Loaded 27782 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl +Rank 0: Loading os_genesis_ac_training_data +Rank 0: Skipping os_genesis_ac_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_aw_training_data +Rank 0: Skipping os_genesis_aw_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_web_training +Rank 0: Skipping os_genesis_web_training due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_1 +Rank 0: Skipping gui_odyssey_plus_1 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_2 +Rank 0: Skipping gui_odyssey_plus_2 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_custom_3 +Rank 0: Skipping gui_odyssey_plus_custom_3 due to repeat_time=0 +Rank 0: Loading mm_gui_mid +Rank 0: Skipping mm_gui_mid due to repeat_time=0 +Rank 0: Loading text_gui_mid +Rank 0: Skipping text_gui_mid due to repeat_time=0 +Rank 0: Loading gui_mid_trajectory +Rank 0: Skipping gui_mid_trajectory due to repeat_time=0 +Rank 0: Loading ubuntu_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7024 samples from VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl +Rank 0: Loading windows_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3144 samples from VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl +Rank 0: Total training samples: 6240940 +Rank 0: Formatting inputs...Skip in lazy mode +Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. +W0817 21:19:16.523000 63300 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:19:16.523000 63300 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:19:16.523000 63300 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:19:16.523000 63300 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +*** +W0817 21:19:17.013000 115877 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:19:17.013000 115877 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:19:17.013000 115877 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:19:17.013000 115877 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:19:20.375000 12277 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:19:20.375000 12277 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:19:20.375000 12277 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:19:20.375000 12277 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:19:20.394000 9103 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:19:20.394000 9103 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:19:20.394000 9103 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:19:20.394000 9103 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:19:20.412000 86769 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:19:20.412000 86769 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:19:20.412000 86769 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:19:20.412000 86769 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:19:20.715000 75224 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:19:20.715000 75224 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:19:20.715000 75224 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:19:20.715000 75224 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:19:24.691000 127166 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:19:24.691000 127166 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:19:24.691000 127166 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:19:24.691000 127166 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-17 21:19:40,392] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,402] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,429] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,448] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,448] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,449] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,449] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,450] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,450] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,456] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,456] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,456] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,457] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,457] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,457] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,457] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,457] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,466] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,466] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,466] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:40,466] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:44,067] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,078] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,067] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,068] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,068] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,069] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,078] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,078] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,078] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,078] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,078] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,078] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,079] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,079] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,079] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,079] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,079] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,079] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,079] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,080] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:44,516] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:44,522] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:44,538] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:44,647] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:44,650] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:44,652] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:44,654] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:44,655] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:44,656] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:44,656] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:44,657] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:44,657] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:44,658] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:44,660] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:44,669] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:44,671] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:44,671] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:44,674] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:44,675] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:44,675] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:50,279] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,279] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,292] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,302] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,302] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,303] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,305] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,312] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,323] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,324] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,324] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,322] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,324] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,323] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,323] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,323] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,324] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,324] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,324] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:50,324] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:56,296] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,296] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,296] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,296] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,296] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,296] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,297] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,297] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,778] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,778] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,778] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,778] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,779] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,779] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,779] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,779] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,807] [INFO] [comm.py:652:init_distributed] cdb=None +ice None world_size = 64 +[2025-08-17 21:19:56,807] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,807] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,807] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,807] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,807] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-17 21:19:56,808] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,808] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,808] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:19:56,957] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:56,963] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:56,963] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:56,964] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:56,964] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:56,968] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:56,968] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:57,216] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:57,290] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:57,329] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:57,337] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:57,338] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:57,339] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:57,340] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:57,341] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:57,342] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:57,424] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:57,425] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:57,427] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:57,430] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:57,433] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:57,433] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:19:57,433] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:19:59,634] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:59,675] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:59,698] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:59,698] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:59,714] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:59,716] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:59,718] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:59,718] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:59,718] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:59,719] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:59,720] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:59,721] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:59,721] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:19:59,721] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:20:08,612] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:20:08,613] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:20:08,614] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:20:08,615] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:20:08,615] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:20:08,616] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:20:08,616] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:20:08,616] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:20:08,975] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:20:08,975] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:20:08,975] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:20:08,976] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:20:08,977] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:20:08,977] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:20:08,977] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:20:08,978] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:20:09,289] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:20:09,452] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:20:09,465] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:20:09,493] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:20:09,495] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:20:09,496] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:20:09,504] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:20:09,507] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:20:09,512] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:20:09,582] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:20:09,587] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:20:09,589] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:20:09,590] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:20:09,592] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:20:09,595] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:20:09,596] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 621928 samples from VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl +Rank 0: Loading altas_windows +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 537672 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl +Rank 0: Loading altas_linux +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 21578 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl +Rank 0: Loading atlas_macos +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 3680 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl +Rank 0: Loading android_action_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 5621 samples from VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 11980 samples from VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl +Rank 0: Loading web_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9459 samples from VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 328 samples from VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 54 samples from VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 480 samples from VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 240 samples from VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 944 samples from VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 472 samples from VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading mac_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 1578 samples from VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20394 samples from VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl +Rank 0: Loading web_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 14285 samples from VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl +Rank 0: Loading android_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 3590 samples from VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 21422 samples from VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 100 samples from VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_aug_cropping_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 7675 samples from VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20116 samples from VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl +Rank 0: Loading iphone_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2520 samples from VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl +Rank 0: Loading mac_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7814 samples from VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl +Rank 0: Loading android_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 17838 samples from VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading android_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9008 samples from VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_canvas_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 624 samples from VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 100652 samples from VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl with all sampling strategy +Rank 0: Loaded 28346 samples from VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl +Rank 0: Loading android_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 4907 samples from VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2635 samples from VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5234 samples from VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading ubuntu_action_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl with all sampling strategy +Rank 0: Loaded 3404 samples from VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_1 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2855 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_2 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 655 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_3 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_4 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2643 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_1 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_2 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_3 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_4 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_5 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_6 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_7 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_8 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_crop_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 358 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7946 samples from VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3973 samples from VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 993 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 630 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 249 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2538 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1269 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 172 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl +Rank 0: Loading android_ocr_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl with all sampling strategy +Rank 0: Loaded 29878 samples from VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl +Rank 0: Loading mac_orc_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl with all sampling strategy +Rank 0: Loaded 4393 samples from VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 254 samples from VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_click_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl with random:80% sampling strategy +Rank 0: Loaded 1802 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 53 samples from VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 6578 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 3287 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 542 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_crop_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 270 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 10908 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 5453 samples from VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading android_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13950 samples from VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl +Rank 0: Loading windows_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 12390 samples from VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl +Rank 0: Loading web_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13402 samples from VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl +Rank 0: Loading icon_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl with all sampling strategy +Rank 0: Loaded 81303 samples from VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl +Rank 0: Loading mac_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 1251 samples from VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl +Rank 0: Loading iphone_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 27849 samples from VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl +Rank 0: Loading web_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl with random:80% sampling strategy +Rank 0: Loaded 51607 samples from VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl +Rank 0: Loading android_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl with random:80% sampling strategy +Rank 0: Loaded 6281 samples from VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl +Rank 0: Loading windows_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 25961 samples from VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl +Rank 0: Loading windows_cropping_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl with random:40% sampling strategy +Rank 0: Loaded 9763 samples from VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl +Rank 0: Loading iphone_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl with all sampling strategy +Rank 0: Loaded 16896 samples from VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl +Rank 0: Loading iphone_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl with all sampling strategy +Rank 0: Loaded 927 samples from VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl +Rank 0: Loading mac_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl with all sampling strategy +Rank 0: Loaded 3051 samples from VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl +Rank 0: Loading android_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 25217 samples from VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading android_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 12677 samples from VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading web_canvas_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl with random:40% sampling strategy +Rank 0: Loaded 1566 samples from VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl +Rank 0: Loading web_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 174170 samples from VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 24862 samples from VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl +Rank 0: Loading android_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl with random:80% sampling strategy +Rank 0: Loaded 9142 samples from VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl +Rank 0: Loading windows_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 4229 samples from VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 4200 samples from VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl with random:80% sampling strategy +Rank 0: Loaded 2868 samples from VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_crop_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1372 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 590 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl with all sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1692 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1077 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1070 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 431 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 292 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1509 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 1207 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading uibert_train_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 4646 samples from VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl +Rank 0: Loading openapp_taperception_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 2500 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_widget_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 14878 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_mug_grounding_d240812 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl with all sampling strategy +Rank 0: Loaded 26090 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl +Rank 0: Loading private_ui_phone_2403_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 24798 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ground_d240521_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl with all sampling strategy +Rank 0: Loaded 5008 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl +Rank 0: Loading private_ui_aig_share_2406_ground_d240612_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl with all sampling strategy +Rank 0: Loaded 7903 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl +Rank 0: Loading windows_pc_agent_e_planning_cot +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 55564 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl +Rank 0: Loading windows_pc_agent_e_navigation +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl with all sampling strategy +Rank 0: Loaded 27782 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl +Rank 0: Loading os_genesis_ac_training_data +Rank 0: Skipping os_genesis_ac_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_aw_training_data +Rank 0: Skipping os_genesis_aw_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_web_training +Rank 0: Skipping os_genesis_web_training due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_1 +Rank 0: Skipping gui_odyssey_plus_1 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_2 +Rank 0: Skipping gui_odyssey_plus_2 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_custom_3 +Rank 0: Skipping gui_odyssey_plus_custom_3 due to repeat_time=0 +Rank 0: Loading mm_gui_mid +Rank 0: Skipping mm_gui_mid due to repeat_time=0 +Rank 0: Loading text_gui_mid +Rank 0: Skipping text_gui_mid due to repeat_time=0 +Rank 0: Loading gui_mid_trajectory +Rank 0: Skipping gui_mid_trajectory due to repeat_time=0 +Rank 0: Loading ubuntu_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7024 samples from VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl +Rank 0: Loading windows_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3144 samples from VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl +Rank 0: Total training samples: 6240940 +Rank 0: Formatting inputs...Skip in lazy mode +Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. +Rank 0: Length of multimodal samples: 6230528, pure textual samples: 9984 +Parameter Offload: Total persistent parameters: 755712 in 408 params +W0817 21:44:12.486000 63790 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:44:12.486000 63790 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:44:12.486000 63790 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:44:12.486000 63790 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:44:12.499000 36815 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:44:12.499000 36815 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:44:12.499000 36815 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:44:12.499000 36815 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:44:12.502000 95144 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:44:12.502000 95144 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:44:12.502000 95144 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:44:12.502000 95144 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:44:16.297000 33653 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:44:16.297000 33653 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:44:16.297000 33653 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:44:16.297000 33653 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:44:16.304000 126735 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:44:16.304000 126735 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:44:16.304000 126735 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:44:16.304000 126735 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:44:17.643000 78935 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:44:17.643000 78935 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:44:17.643000 78935 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:44:17.643000 78935 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-17 21:44:35,181] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,182] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,202] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,221] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,222] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,226] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,231] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,232] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,232] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,232] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,227] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,227] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,227] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,228] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,228] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,229] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,231] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,231] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,231] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,233] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,233] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,233] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:35,233] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:39,228] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,228] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,228] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,228] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,228] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,229] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,230] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,230] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,624] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,625] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,625] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,625] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,625] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,625] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,625] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,625] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,680] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,680] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,680] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,680] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,680] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,680] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,680] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,681] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:39,682] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:39,814] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:39,815] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:39,816] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:39,821] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:39,821] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:39,822] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:39,825] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:40,115] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:40,140] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:40,244] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:40,242] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:40,243] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:40,243] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:40,244] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:40,244] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:40,246] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:40,248] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:40,260] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:40,265] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:40,266] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:40,268] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:44,914] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,914] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,919] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,920] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,931] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,939] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,939] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,947] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,948] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,948] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,950] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,951] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,953] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,953] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,953] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,962] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,962] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,963] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,963] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,963] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,967] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,968] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,969] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,981] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,978] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,978] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,979] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,979] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,981] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,982] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,982] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,983] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,983] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,983] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:44,983] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:44:51,236] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:51,242] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:51,237] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:51,238] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:51,243] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-17 21:44:51,244][2025-08-17 21:44:51,245] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:51,245] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:51,240] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:51,247] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:51,248] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:51,248] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:51,246] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:51,247] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:51,249] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:51,253] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:51,254] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:51,250] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:51,252] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:44:51,698] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:51,725] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:51,740] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:51,824] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:51,822] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:51,827] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:51,828] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:51,831] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:51,833] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:51,834] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:51,851] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:51,864] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:51,867] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:51,868] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:51,869] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:51,870] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:51,872] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:51,876] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +[2025-08-17 21:44:51,878] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:51,879] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:51,881] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:51,883] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:51,886] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:51,888] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:51,897] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:51,966] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:51,967] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:51,970] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:51,976] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:51,977] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:44:51,980] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:44:51,983] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 621928 samples from VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl +Rank 0: Loading altas_windows +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 537672 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl +Rank 0: Loading altas_linux +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 21578 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl +Rank 0: Loading atlas_macos +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 3680 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl +Rank 0: Loading android_action_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 5621 samples from VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 11980 samples from VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl +Rank 0: Loading web_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9459 samples from VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 328 samples from VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 54 samples from VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 480 samples from VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 240 samples from VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 944 samples from VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 472 samples from VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading mac_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 1578 samples from VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20394 samples from VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl +Rank 0: Loading web_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 14285 samples from VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl +Rank 0: Loading android_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 3590 samples from VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 21422 samples from VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 100 samples from VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_aug_cropping_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 7675 samples from VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20116 samples from VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl +Rank 0: Loading iphone_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2520 samples from VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl +Rank 0: Loading mac_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7814 samples from VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl +Rank 0: Loading android_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 17838 samples from VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading android_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9008 samples from VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_canvas_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 624 samples from VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 100652 samples from VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl with all sampling strategy +Rank 0: Loaded 28346 samples from VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl +Rank 0: Loading android_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 4907 samples from VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2635 samples from VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5234 samples from VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading ubuntu_action_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl with all sampling strategy +Rank 0: Loaded 3404 samples from VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_1 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2855 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_2 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 655 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_3 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_4 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2643 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_1 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_2 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_3 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_4 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_5 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_6 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_7 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_8 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_crop_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 358 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7946 samples from VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl with random:50% sampling strategy +W0817 21:52:03.818000 79048 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:52:03.818000 79048 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:52:03.818000 79048 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:52:03.818000 79048 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:52:03.839000 115042 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:52:03.839000 115042 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:52:03.839000 115042 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:52:03.839000 115042 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:52:04.145000 50213 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:52:04.145000 50213 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:52:04.145000 50213 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:52:04.145000 50213 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:52:04.140000 16447 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:52:04.140000 16447 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:52:04.140000 16447 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:52:04.140000 16447 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:52:04.172000 98601 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:52:04.172000 98601 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:52:04.172000 98601 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:52:04.172000 98601 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:52:04.603000 61453 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:52:04.603000 61453 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:52:04.603000 61453 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:52:04.603000 61453 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:52:04.810000 109145 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 21:52:04.810000 109145 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 21:52:04.810000 109145 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 21:52:04.810000 109145 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-17 21:52:16,922] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:16,955] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:16,969] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:16,969] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:16,971] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:16,971] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:16,971] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:16,973] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:16,973] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:16,962] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:16,963] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:16,963] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:16,971] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:16,971] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:16,971] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:16,971] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,191] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,192] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,192] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,210] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,210] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,210] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,211] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,211] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,224] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,225] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,225] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,226] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,223] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,224] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,224] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,224] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,225] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,226] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,226] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,226] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,235] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,235] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,235] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:18,235] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:19,212] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:19,213] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:19,213] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:19,215] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:19,217] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:19,217] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:19,218] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:19,218] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:20,230] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:20,235] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:20,223] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:20,235] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:20,236] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:20,236] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:20,238] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:20,238] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:20,223] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:20,223] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:20,223] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:20,223] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:20,224] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:20,224] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:20,224] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-17 21:52:20,662] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:20,667] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:20,709] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:20,787] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:20,793] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:20,803] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:20,803] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:20,804] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:20,805] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:20,794] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +ter initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:20,808] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:20,805] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:20,805] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:20,808] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:20,809] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:20,810] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:20,814] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:20,815] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:20,815] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:20,817] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:20,817] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:20,817] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:20,817] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 21:52:20,848] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:20,851] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:20,854] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:20,858] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:20,863] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:20,864] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:20,864] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:21,322] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,328] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,331] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,334] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,334] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,334] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,335] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,337] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,341] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,353] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,341] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,341] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,341] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,341] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,341] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,341] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,353] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,353] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,353] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,353] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,353] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,354] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,354] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:21,796] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:21,811] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:21,836] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:21,923] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:21,923] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:21,933] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:21,935] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:21,938] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:21,939] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:21,941] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:21,953] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:21,962] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:21,963] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:21,964] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:21,967] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:21,967] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:21,968] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:21,993] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:21,994] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:21,994] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:22,000] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:22,001] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:22,001] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:22,001] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:23,194] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:23,194] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:23,194] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:23,195] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:23,195] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:23,195] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:23,198] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:23,198] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:23,681] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:23,803] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:23,810] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:23,815] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:23,819] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:23,824] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:23,826] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:23,830] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:24,100] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:24,100] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:24,100] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:24,100] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:24,100] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:24,100] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:24,101] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:24,101] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 21:52:24,569] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:24,721] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:24,721] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:24,722] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:24,723] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:24,724] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 21:52:24,725] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 21:52:24,726] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 621928 samples from VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl +Rank 0: Loading altas_windows +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 537672 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl +Rank 0: Loading altas_linux +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 21578 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl +Rank 0: Loading atlas_macos +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 3680 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl +Rank 0: Loading android_action_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 5621 samples from VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 11980 samples from VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl +Rank 0: Loading web_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9459 samples from VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 328 samples from VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 54 samples from VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 480 samples from VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 240 samples from VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 944 samples from VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 472 samples from VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading mac_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 1578 samples from VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20394 samples from VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl +Rank 0: Loading web_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 14285 samples from VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl +Rank 0: Loading android_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 3590 samples from VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 21422 samples from VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 100 samples from VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_aug_cropping_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 7675 samples from VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20116 samples from VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl +Rank 0: Loading iphone_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2520 samples from VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl +Rank 0: Loading mac_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7814 samples from VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl +Rank 0: Loading android_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 17838 samples from VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading android_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9008 samples from VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_canvas_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 624 samples from VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 100652 samples from VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl with all sampling strategy +Rank 0: Loaded 28346 samples from VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl +Rank 0: Loading android_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 4907 samples from VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2635 samples from VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5234 samples from VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading ubuntu_action_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl with all sampling strategy +Rank 0: Loaded 3404 samples from VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_1 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2855 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_2 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 655 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_3 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_4 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2643 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_1 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_2 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_3 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_4 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_5 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_6 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_7 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_8 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_crop_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 358 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7946 samples from VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3973 samples from VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 993 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 630 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 249 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2538 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1269 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 172 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl +Rank 0: Loading android_ocr_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl with all sampling strategy +Rank 0: Loaded 29878 samples from VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl +Rank 0: Loading mac_orc_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl with all sampling strategy +Rank 0: Loaded 4393 samples from VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 254 samples from VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_click_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl with random:80% sampling strategy +Rank 0: Loaded 1802 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 53 samples from VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 6578 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 3287 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 542 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_crop_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 270 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 10908 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 5453 samples from VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading android_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13950 samples from VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl +Rank 0: Loading windows_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 12390 samples from VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl +Rank 0: Loading web_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13402 samples from VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl +Rank 0: Loading icon_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl with all sampling strategy +Rank 0: Loaded 81303 samples from VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl +Rank 0: Loading mac_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 1251 samples from VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl +Rank 0: Loading iphone_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 27849 samples from VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl +Rank 0: Loading web_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl with random:80% sampling strategy +Rank 0: Loaded 51607 samples from VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl +Rank 0: Loading android_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl with random:80% sampling strategy +Rank 0: Loaded 6281 samples from VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl +Rank 0: Loading windows_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 25961 samples from VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl +Rank 0: Loading windows_cropping_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl with random:40% sampling strategy +Rank 0: Loaded 9763 samples from VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl +Rank 0: Loading iphone_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl with all sampling strategy +Rank 0: Loaded 16896 samples from VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl +Rank 0: Loading iphone_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl with all sampling strategy +Rank 0: Loaded 927 samples from VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl +Rank 0: Loading mac_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl with all sampling strategy +Rank 0: Loaded 3051 samples from VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl +Rank 0: Loading android_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 25217 samples from VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading android_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 12677 samples from VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading web_canvas_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl with random:40% sampling strategy +Rank 0: Loaded 1566 samples from VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl +Rank 0: Loading web_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 174170 samples from VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 24862 samples from VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl +Rank 0: Loading android_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl with random:80% sampling strategy +Rank 0: Loaded 9142 samples from VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl +Rank 0: Loading windows_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 4229 samples from VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 4200 samples from VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl with random:80% sampling strategy +Rank 0: Loaded 2868 samples from VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_crop_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1372 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 590 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl with all sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1692 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1077 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1070 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 431 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 292 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1509 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 1207 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading uibert_train_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 4646 samples from VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl +Rank 0: Loading openapp_taperception_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 2500 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_widget_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 14878 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_mug_grounding_d240812 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl with all sampling strategy +Rank 0: Loaded 26090 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl +Rank 0: Loading private_ui_phone_2403_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 24798 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ground_d240521_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl with all sampling strategy +Rank 0: Loaded 5008 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl +Rank 0: Loading private_ui_aig_share_2406_ground_d240612_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl with all sampling strategy +Rank 0: Loaded 7903 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl +Rank 0: Loading windows_pc_agent_e_planning_cot +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 55564 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl +Rank 0: Loading windows_pc_agent_e_navigation +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl with all sampling strategy +Rank 0: Loaded 27782 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl +Rank 0: Loading os_genesis_ac_training_data +Rank 0: Skipping os_genesis_ac_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_aw_training_data +Rank 0: Skipping os_genesis_aw_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_web_training +Rank 0: Skipping os_genesis_web_training due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_1 +Rank 0: Skipping gui_odyssey_plus_1 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_2 +Rank 0: Skipping gui_odyssey_plus_2 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_custom_3 +Rank 0: Skipping gui_odyssey_plus_custom_3 due to repeat_time=0 +Rank 0: Loading mm_gui_mid +Rank 0: Skipping mm_gui_mid due to repeat_time=0 +Rank 0: Loading text_gui_mid +Rank 0: Skipping text_gui_mid due to repeat_time=0 +Rank 0: Loading gui_mid_trajectory +Rank 0: Skipping gui_mid_trajectory due to repeat_time=0 +Rank 0: Loading ubuntu_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7024 samples from VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl +Rank 0: Loading windows_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3144 samples from VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl +Rank 0: Total training samples: 6240940 +Rank 0: Formatting inputs...Skip in lazy mode +Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. +Rank 0: Length of multimodal samples: 6230528, pure textual samples: 9984 +Parameter Offload: Total persistent parameters: 755712 in 408 params + 0%| | 0/12188 [00:00 + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f931c150220> +[Try #0] Failed to fetch sample 4409357 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f931c150220> +Problematic sample: {'image': '20240823_045930_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Data Providers'"}, {'from': 'gpt', 'value': '\nclick(x=0.8015, y=0.921)\n'}]} + 82%|████████▏ | 10042/12188 [06:47<4:32:30, 7.62s/it] {'loss': 0.266, 'grad_norm': 0.6959317070788847, 'learning_rate': 7.919700013163128e-07, 'epoch': 0.82} + 82%|████████▏ | 10042/12188 [06:47<4:32:30, 7.62s/it] 82%|████████▏ | 10043/12188 [06:55<4:31:47, 7.60s/it] {'loss': 0.3084, 'grad_norm': 0.6561030557495207, 'learning_rate': 7.912525264167342e-07, 'epoch': 0.82} + 82%|████████▏ | 10043/12188 [06:55<4:31:47, 7.60s/it] 82%|████████▏ | 10044/12188 [07:04<4:50:29, 8.13s/it] {'loss': 0.3039, 'grad_norm': 0.6985738348086716, 'learning_rate': 7.905353487321404e-07, 'epoch': 0.82} + 82%|████████▏ | 10044/12188 [07:04<4:50:29, 8.13s/it] 82%|████████▏ | 10045/12188 [07:13<4:54:18, 8.24s/it] {'loss': 0.3281, 'grad_norm': 0.7515329199893266, 'learning_rate': 7.898184683131782e-07, 'epoch': 0.82} + 82%|████████▏ | 10045/12188 [07:13<4:54:18, 8.24s/it] 82%|████████▏ | 10046/12188 [07:20<4:45:48, 8.01s/it] {'loss': 0.3077, 'grad_norm': 0.7441253025005456, 'learning_rate': 7.891018852104709e-07, 'epoch': 0.82} + 82%|████████▏ | 10046/12188 [07:20<4:45:48, 8.01s/it] 82%|████████▏ | 10047/12188 [07:27<4:31:22, 7.61s/it] {'loss': 0.3055, 'grad_norm': 0.8049324881088321, 'learning_rate': 7.883855994746237e-07, 'epoch': 0.82} + 82%|████████▏ | 10047/12188 [07:27<4:31:22, 7.61s/it]W0817 22:15:25.843000 130210 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 22:15:25.843000 130210 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 22:15:25.843000 130210 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 22:15:25.843000 130210 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 22:15:25.859000 79385 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 22:15:25.859000 79385 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 22:15:25.859000 79385 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 22:15:25.859000 79385 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 22:15:25.942000 11617 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 22:15:25.942000 11617 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 22:15:25.942000 11617 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 22:15:25.942000 11617 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 22:15:32.734000 49179 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0817 22:15:32.734000 49179 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0817 22:15:32.734000 49179 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0817 22:15:32.734000 49179 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-17 22:16:08,477] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,480] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,494] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,494] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,495] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,496] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,496] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,496] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,512] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,511] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,511] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,511] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,511] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,512] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,512] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,512] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,527] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,527] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,527] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,527] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,528] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,528] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,524] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,544] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,545] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,545] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,547] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,547] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,545] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,545] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,545] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,548] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,549] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,549] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,538] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,540] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,540] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,540] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:08,540] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:09,318] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:09,328] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:09,332] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:09,333] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:09,334] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:09,335] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:09,336] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:09,336] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:17,699] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:17,702] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:17,688] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:17,688] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:17,689] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:17,689] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:17,704] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:17,705] [INFO] [comm.py:652:in[2025-08-17 22:16:17,708] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:17,709] [INFO] [comm.py:652:init_distributed] cdb=None +it_distributed] cdb=None +[2025-08-17 22:16:17,707] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:17,691] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:17,692] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:17,704] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:17,703] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:17,703] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:17,705] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:17,706] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:17,710] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:17,711] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:18,133] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,161] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,170] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:18,169] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,185] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:18,185] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,179] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,251] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:18,251] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,257] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,267] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +[2025-08-17 22:16:18,269] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:18,280] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to mo[2025-08-17 22:16:18,280] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +e Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,285] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:18,285] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,292] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:18,296] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,296] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +[2025-08-17 22:16:18,296] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:18,296] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,303] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:18,304] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:18,304] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:18,305] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,309] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,309] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,301] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,320] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,319] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:18,321] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:18,322] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:18,322] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:18,323] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,324] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,317] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,330] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:18,330] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:18,330] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,325] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,327] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:18,327] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:18,327] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:18,437] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:18,459] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:18,468] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:18,468] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:18,495] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:18,495] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:18,497] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:18,497] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-17 22:16:28,742] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:28,742] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:28,743] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:28,743] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:28,743] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:28,743] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:28,743] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:28,744] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-17 22:16:29,305] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:29,502] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:29,514] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:29,523] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:29,529] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:29,529] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-17 22:16:29,532] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-17 22:16:29,535] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 621928 samples from VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl +Rank 0: Loading altas_windows +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 537672 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl +Rank 0: Loading altas_linux +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 21578 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl +Rank 0: Loading atlas_macos +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 3680 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl +Rank 0: Loading android_action_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 5621 samples from VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 11980 samples from VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl +Rank 0: Loading web_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9459 samples from VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 328 samples from VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 54 samples from VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 480 samples from VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 240 samples from VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 944 samples from VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 472 samples from VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading mac_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 1578 samples from VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20394 samples from VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl +Rank 0: Loading web_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 14285 samples from VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl +Rank 0: Loading android_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 3590 samples from VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 21422 samples from VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 100 samples from VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_aug_cropping_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 7675 samples from VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20116 samples from VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl +Rank 0: Loading iphone_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2520 samples from VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl +Rank 0: Loading mac_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7814 samples from VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl +Rank 0: Loading android_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 17838 samples from VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading android_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9008 samples from VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_canvas_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 624 samples from VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 100652 samples from VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl with all sampling strategy +Rank 0: Loaded 28346 samples from VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl +Rank 0: Loading android_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 4907 samples from VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2635 samples from VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5234 samples from VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading ubuntu_action_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl with all sampling strategy +Rank 0: Loaded 3404 samples from VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_1 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2855 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_2 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 655 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_3 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_4 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2643 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_1 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_2 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_3 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_4 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_5 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_6 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_7 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_8 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_crop_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 358 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7946 samples from VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3973 samples from VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 993 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 630 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 249 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2538 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1269 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 172 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl +Rank 0: Loading android_ocr_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl with all sampling strategy +Rank 0: Loaded 29878 samples from VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl +Rank 0: Loading mac_orc_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl with all sampling strategy +Rank 0: Loaded 4393 samples from VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 254 samples from VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_click_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl with random:80% sampling strategy +Rank 0: Loaded 1802 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 53 samples from VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 6578 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 3287 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 542 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_crop_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 270 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 10908 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 5453 samples from VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading android_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13950 samples from VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl +Rank 0: Loading windows_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 12390 samples from VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl +Rank 0: Loading web_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13402 samples from VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl +Rank 0: Loading icon_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl with all sampling strategy +Rank 0: Loaded 81303 samples from VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl +Rank 0: Loading mac_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 1251 samples from VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl +Rank 0: Loading iphone_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 27849 samples from VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl +Rank 0: Loading web_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl with random:80% sampling strategy +Rank 0: Loaded 51607 samples from VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl +Rank 0: Loading android_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl with random:80% sampling strategy +Rank 0: Loaded 6281 samples from VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl +Rank 0: Loading windows_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 25961 samples from VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl +Rank 0: Loading windows_cropping_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl with random:40% sampling strategy +Rank 0: Loaded 9763 samples from VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl +Rank 0: Loading iphone_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl with all sampling strategy +Rank 0: Loaded 16896 samples from VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl +Rank 0: Loading iphone_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl with all sampling strategy +Rank 0: Loaded 927 samples from VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl +Rank 0: Loading mac_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl with all sampling strategy +Rank 0: Loaded 3051 samples from VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl +Rank 0: Loading android_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 25217 samples from VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading android_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 12677 samples from VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading web_canvas_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl with random:40% sampling strategy +Rank 0: Loaded 1566 samples from VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl +Rank 0: Loading web_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 174170 samples from VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 24862 samples from VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl +Rank 0: Loading android_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl with random:80% sampling strategy +Rank 0: Loaded 9142 samples from VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl +Rank 0: Loading windows_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 4229 samples from VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 4200 samples from VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl with random:80% sampling strategy +Rank 0: Loaded 2868 samples from VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_crop_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1372 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 590 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl with all sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1692 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1077 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1070 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 431 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 292 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1509 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 1207 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading uibert_train_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 4646 samples from VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl +Rank 0: Loading openapp_taperception_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 2500 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_widget_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 14878 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_mug_grounding_d240812 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl with all sampling strategy +Rank 0: Loaded 26090 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl +Rank 0: Loading private_ui_phone_2403_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 24798 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ground_d240521_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl with all sampling strategy +Rank 0: Loaded 5008 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl +Rank 0: Loading private_ui_aig_share_2406_ground_d240612_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl with all sampling strategy +Rank 0: Loaded 7903 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl +Rank 0: Loading windows_pc_agent_e_planning_cot +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 55564 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl +Rank 0: Loading windows_pc_agent_e_navigation +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl with all sampling strategy +Rank 0: Loaded 27782 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl +Rank 0: Loading os_genesis_ac_training_data +Rank 0: Skipping os_genesis_ac_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_aw_training_data +Rank 0: Skipping os_genesis_aw_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_web_training +Rank 0: Skipping os_genesis_web_training due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_1 +Rank 0: Skipping gui_odyssey_plus_1 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_2 +Rank 0: Skipping gui_odyssey_plus_2 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_custom_3 +Rank 0: Skipping gui_odyssey_plus_custom_3 due to repeat_time=0 +Rank 0: Loading mm_gui_mid +Rank 0: Skipping mm_gui_mid due to repeat_time=0 +Rank 0: Loading text_gui_mid +Rank 0: Skipping text_gui_mid due to repeat_time=0 +Rank 0: Loading gui_mid_trajectory +Rank 0: Skipping gui_mid_trajectory due to repeat_time=0 +Rank 0: Loading ubuntu_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7024 samples from VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl +Rank 0: Loading windows_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3144 samples from VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl +Rank 0: Total training samples: 6240940 +Rank 0: Formatting inputs...Skip in lazy mode +Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. +Rank 0: Length of multimodal samples: 6230528, pure textual samples: 9984 +Parameter Offload: Total persistent parameters: 755712 in 408 params + 0%| | 0/12188 [00:00 + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fb8f0d1a250> +[Try #0] Failed to fetch sample 4409357 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fb8f0d1a250> +Problematic sample: {'image': '20240823_045930_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Data Providers'"}, {'from': 'gpt', 'value': '\nclick(x=0.8015, y=0.921)\n'}]} + 82%|████████▏ | 10042/12188 [07:09<4:31:54, 7.60s/it] {'loss': 0.266, 'grad_norm': 0.6360412162482747, 'learning_rate': 7.919700013163128e-07, 'epoch': 0.82} + 82%|████████▏ | 10042/12188 [07:09<4:31:54, 7.60s/it] 82%|████████▏ | 10043/12188 [07:16<4:30:59, 7.58s/it] {'loss': 0.3081, 'grad_norm': 0.692102450354048, 'learning_rate': 7.912525264167342e-07, 'epoch': 0.82} + 82%|████████▏ | 10043/12188 [07:16<4:30:59, 7.58s/it] 82%|████████▏ | 10044/12188 [07:26<4:50:21, 8.13s/it] {'loss': 0.3037, 'grad_norm': 0.713994656437912, 'learning_rate': 7.905353487321404e-07, 'epoch': 0.82} + 82%|████████▏ | 10044/12188 [07:26<4:50:21, 8.13s/it] 82%|████████▏ | 10045/12188 [07:34<4:55:27, 8.27s/it] {'loss': 0.3283, 'grad_norm': 0.7094780959274872, 'learning_rate': 7.898184683131782e-07, 'epoch': 0.82} + 82%|████████▏ | 10045/12188 [07:34<4:55:27, 8.27s/it] 82%|████████▏ | 10046/12188 [07:42<4:46:17, 8.02s/it] {'loss': 0.3074, 'grad_norm': 0.850853289497508, 'learning_rate': 7.891018852104709e-07, 'epoch': 0.82} + 82%|████████▏ | 10046/12188 [07:42<4:46:17, 8.02s/it] 82%|████████▏ | 10047/12188 [07:48<4:31:34, 7.61s/it] {'loss': 0.3055, 'grad_norm': 0.7082405832251443, 'learning_rate': 7.883855994746237e-07, 'epoch': 0.82} + 82%|████████▏ | 10047/12188 [07:48<4:31:34, 7.61s/it] 82%|████████▏ | 10048/12188 [07:55<4:24:54, 7.43s/it] {'loss': 0.2813, 'grad_norm': 0.6408976885721273, 'learning_rate': 7.876696111562182e-07, 'epoch': 0.82} + 82%|████████▏ | 10048/12188 [07:55<4:24:54, 7.43s/it] 82%|████████▏ | 10049/12188 [08:03<4:32:27, 7.64s/it] {'loss': 0.3307, 'grad_norm': 0.8457482294614042, 'learning_rate': 7.869539203058169e-07, 'epoch': 0.82} + 82%|████████▏ | 10049/12188 [08:03<4:32:27, 7.64s/it] 82%|████████▏ | 10050/12188 [08:10<4:24:58, 7.44s/it] {'loss': 0.2741, 'grad_norm': 0.7392585933378111, 'learning_rate': 7.862385269739625e-07, 'epoch': 0.82} + 82%|████████▏ | 10050/12188 [08:10<4:24:58, 7.44s/it] 82%|████████▏ | 10051/12188 [08:18<4:22:14, 7.36s/it] {'loss': 0.3129, 'grad_norm': 0.6728164543710434, 'learning_rate': 7.855234312111732e-07, 'epoch': 0.82} + 82%|████████▏ | 10051/12188 [08:18<4:22:14, 7.36s/it] 82%|████████▏ | 10052/12188 [08:24<4:15:10, 7.17s/it] {'loss': 0.3049, 'grad_norm': 0.6373167888324497, 'learning_rate': 7.848086330679483e-07, 'epoch': 0.82} + 82%|████████▏ | 10052/12188 [08:24<4:15:10, 7.17s/it] 82%|████████▏ | 10053/12188 [08:33<4:26:30, 7.49s/it] {'loss': 0.3086, 'grad_norm': 0.7139747493586927, 'learning_rate': 7.840941325947637e-07, 'epoch': 0.82} + 82%|████████▏ | 10053/12188 [08:33<4:26:30, 7.49s/it] 82%|████████▏ | 10054/12188 [08:39<4:19:01, 7.28s/it] {'loss': 0.2684, 'grad_norm': 0.7207220427587605, 'learning_rate': 7.833799298420786e-07, 'epoch': 0.82} + 82%|████████▏ | 10054/12188 [08:39<4:19:01, 7.28s/it] 82%|████████▏ | 10055/12188 [08:47<4:21:20, 7.35s/it] {'loss': 0.2825, 'grad_norm': 0.9934156096328983, 'learning_rate': 7.826660248603296e-07, 'epoch': 0.82} + 82%|████████▏ | 10055/12188 [08:47<4:21:20, 7.35s/it] 83%|████████▎ | 10056/12188 [08:54<4:22:45, 7.39s/it] {'loss': 0.3118, 'grad_norm': 0.682804235004525, 'learning_rate': 7.819524176999288e-07, 'epoch': 0.83} + 83%|████████▎ | 10056/12188 [08:54<4:22:45, 7.39s/it] 83%|████████▎ | 10057/12188 [09:03<4:30:52, 7.63s/it] {'loss': 0.3043, 'grad_norm': 0.9209530221132296, 'learning_rate': 7.812391084112731e-07, 'epoch': 0.83} + 83%|████████▎ | 10057/12188 [09:03<4:30:52, 7.63s/it] 83%|████████▎ | 10058/12188 [09:11<4:41:45, 7.94s/it] {'loss': 0.2977, 'grad_norm': 0.6617051280644201, 'learning_rate': 7.80526097044732e-07, 'epoch': 0.83} + 83%|████████▎ | 10058/12188 [09:11<4:41:45, 7.94s/it] 83%|████████▎ | 10059/12188 [09:19<4:39:58, 7.89s/it] {'loss': 0.3023, 'grad_norm': 0.7686421012097013, 'learning_rate': 7.798133836506588e-07, 'epoch': 0.83} + 83%|████████▎ | 10059/12188 [09:19<4:39:58, 7.89s/it] 83%|████████▎ | 10060/12188 [09:26<4:33:37, 7.71s/it] {'loss': 0.3369, 'grad_norm': 0.7411283473070807, 'learning_rate': 7.791009682793855e-07, 'epoch': 0.83} + 83%|████████▎ | 10060/12188 [09:26<4:33:37, 7.71s/it] 83%|████████▎ | 10061/12188 [09:33<4:25:49, 7.50s/it] {'loss': 0.3615, 'grad_norm': 0.6778631731596078, 'learning_rate': 7.783888509812193e-07, 'epoch': 0.83} + 83%|████████▎ | 10061/12188 [09:33<4:25:49, 7.50s/it] 83%|████████▎ | 10062/12188 [09:40<4:20:22, 7.35s/it] {'loss': 0.3076, 'grad_norm': 0.6581147575820441, 'learning_rate': 7.776770318064497e-07, 'epoch': 0.83} + 83%|████████▎ | 10062/12188 [09:40<4:20:22, 7.35s/it] 83%|████████▎ | 10063/12188 [09:47<4:16:04, 7.23s/it] {'loss': 0.2945, 'grad_norm': 0.8437478863880085, 'learning_rate': 7.769655108053459e-07, 'epoch': 0.83} + 83%|████████▎ | 10063/12188 [09:47<4:16:04, 7.23s/it] 83%|████████▎ | 10064/12188 [09:54<4:14:15, 7.18s/it] {'loss': 0.2831, 'grad_norm': 0.6634268516489747, 'learning_rate': 7.762542880281526e-07, 'epoch': 0.83} + 83%|████████▎ | 10064/12188 [09:54<4:14:15, 7.18s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fc80d2a10d0> +[Try #0] Failed to fetch sample 4385424 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fc80d2a10d0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Wiktionary'"}, {'from': 'gpt', 'value': '\nclick(x=0.904, y=0.465)\n'}]} + 83%|████████▎ | 10065/12188 [10:02<4:19:54, 7.35s/it] {'loss': 0.2738, 'grad_norm': 0.7439093895211578, 'learning_rate': 7.755433635250948e-07, 'epoch': 0.83} + 83%|████████▎ | 10065/12188 [10:02<4:19:54, 7.35s/it] 83%|████████▎ | 10066/12188 [10:09<4:12:52, 7.15s/it] {'loss': 0.3045, 'grad_norm': 0.7153656066459356, 'learning_rate': 7.748327373463782e-07, 'epoch': 0.83} + 83%|████████▎ | 10066/12188 [10:09<4:12:52, 7.15s/it] 83%|████████▎ | 10067/12188 [10:15<4:06:23, 6.97s/it] {'loss': 0.2924, 'grad_norm': 0.6878166981229012, 'learning_rate': 7.741224095421845e-07, 'epoch': 0.83} + 83%|████████▎ | 10067/12188 [10:15<4:06:23, 6.97s/it] 83%|████████▎ | 10068/12188 [10:22<4:05:27, 6.95s/it] {'loss': 0.3394, 'grad_norm': 0.9349592057379836, 'learning_rate': 7.734123801626781e-07, 'epoch': 0.83} + 83%|████████▎ | 10068/12188 [10:22<4:05:27, 6.95s/it] 83%|████████▎ | 10069/12188 [10:29<4:03:30, 6.89s/it] {'loss': 0.3236, 'grad_norm': 0.6679612395039647, 'learning_rate': 7.727026492579976e-07, 'epoch': 0.83} + 83%|████████▎ | 10069/12188 [10:29<4:03:30, 6.89s/it] 83%|████████▎ | 10070/12188 [10:38<4:26:16, 7.54s/it] {'loss': 0.265, 'grad_norm': 0.7436833860601304, 'learning_rate': 7.719932168782656e-07, 'epoch': 0.83} + 83%|████████▎ | 10070/12188 [10:38<4:26:16, 7.54s/it] 83%|████████▎ | 10071/12188 [10:45<4:18:49, 7.34s/it] {'loss': 0.2961, 'grad_norm': 0.761161979939725, 'learning_rate': 7.712840830735785e-07, 'epoch': 0.83} + 83%|████████▎ | 10071/12188 [10:45<4:18:49, 7.34s/it] 83%|████████▎ | 10072/12188 [10:52<4:19:29, 7.36s/it] {'loss': 0.3172, 'grad_norm': 0.6690556635762505, 'learning_rate': 7.705752478940154e-07, 'epoch': 0.83} + 83%|████████▎ | 10072/12188 [10:52<4:19:29, 7.36s/it] 83%|████████▎ | 10073/12188 [11:02<4:43:34, 8.04s/it] {'loss': 0.2955, 'grad_norm': 0.7180504766091279, 'learning_rate': 7.698667113896346e-07, 'epoch': 0.83} + 83%|████████▎ | 10073/12188 [11:02<4:43:34, 8.04s/it] 83%|████████▎ | 10074/12188 [11:12<5:00:31, 8.53s/it] {'loss': 0.3383, 'grad_norm': 0.7422141390536374, 'learning_rate': 7.69158473610469e-07, 'epoch': 0.83} + 83%|████████▎ | 10074/12188 [11:12<5:00:31, 8.53s/it] 83%|████████▎ | 10075/12188 [11:20<4:57:47, 8.46s/it] {'loss': 0.3172, 'grad_norm': 0.8573460459245499, 'learning_rate': 7.684505346065363e-07, 'epoch': 0.83} + 83%|████████▎ | 10075/12188 [11:20<4:57:47, 8.46s/it] 83%|████████▎ | 10076/12188 [11:26<4:37:54, 7.90s/it] {'loss': 0.3172, 'grad_norm': 0.6911814242482152, 'learning_rate': 7.677428944278271e-07, 'epoch': 0.83} + 83%|████████▎ | 10076/12188 [11:26<4:37:54, 7.90s/it] 83%|████████▎ | 10077/12188 [11:34<4:30:22, 7.68s/it] {'loss': 0.2707, 'grad_norm': 0.6949122031396373, 'learning_rate': 7.670355531243145e-07, 'epoch': 0.83} + 83%|████████▎ | 10077/12188 [11:34<4:30:22, 7.68s/it] 83%|████████▎ | 10078/12188 [11:44<4:55:31, 8.40s/it] {'loss': 0.2924, 'grad_norm': 0.7027621230037833, 'learning_rate': 7.663285107459517e-07, 'epoch': 0.83} + 83%|████████▎ | 10078/12188 [11:44<4:55:31, 8.40s/it] 83%|████████▎ | 10079/12188 [11:52<4:54:24, 8.38s/it] {'loss': 0.2764, 'grad_norm': 0.6896930836717682, 'learning_rate': 7.65621767342668e-07, 'epoch': 0.83} + 83%|████████▎ | 10079/12188 [11:52<4:54:24, 8.38s/it] 83%|████████▎ | 10080/12188 [12:00<4:47:49, 8.19s/it] {'loss': 0.3119, 'grad_norm': 0.680928142753749, 'learning_rate': 7.649153229643708e-07, 'epoch': 0.83} + 83%|████████▎ | 10080/12188 [12:00<4:47:49, 8.19s/it] 83%|████████▎ | 10081/12188 [12:07<4:38:43, 7.94s/it] {'loss': 0.3326, 'grad_norm': 0.7075469386971073, 'learning_rate': 7.6420917766095e-07, 'epoch': 0.83} + 83%|████████▎ | 10081/12188 [12:07<4:38:43, 7.94s/it] 83%|████████▎ | 10082/12188 [12:15<4:41:55, 8.03s/it] {'loss': 0.3062, 'grad_norm': 0.6904147459567664, 'learning_rate': 7.63503331482271e-07, 'epoch': 0.83} + 83%|████████▎ | 10082/12188 [12:15<4:41:55, 8.03s/it] 83%|████████▎ | 10083/12188 [12:24<4:52:55, 8.35s/it] {'loss': 0.2969, 'grad_norm': 0.6492985868243251, 'learning_rate': 7.627977844781815e-07, 'epoch': 0.83} + 83%|████████▎ | 10083/12188 [12:24<4:52:55, 8.35s/it] 83%|████████▎ | 10084/12188 [12:31<4:35:26, 7.86s/it] {'loss': 0.3002, 'grad_norm': 0.7955690253104271, 'learning_rate': 7.620925366985033e-07, 'epoch': 0.83} + 83%|████████▎ | 10084/12188 [12:31<4:35:26, 7.86s/it] 83%|████████▎ | 10085/12188 [12:40<4:45:34, 8.15s/it] {'loss': 0.2712, 'grad_norm': 0.6990664272258564, 'learning_rate': 7.613875881930416e-07, 'epoch': 0.83} + 83%|████████▎ | 10085/12188 [12:40<4:45:34, 8.15s/it] 83%|████████▎ | 10086/12188 [12:47<4:35:11, 7.86s/it] {'loss': 0.2989, 'grad_norm': 0.6754347286889993, 'learning_rate': 7.606829390115799e-07, 'epoch': 0.83} + 83%|████████▎ | 10086/12188 [12:47<4:35:11, 7.86s/it] 83%|████████▎ | 10087/12188 [12:54<4:24:43, 7.56s/it] {'loss': 0.28, 'grad_norm': 0.6920913174003449, 'learning_rate': 7.599785892038764e-07, 'epoch': 0.83} + 83%|████████▎ | 10087/12188 [12:54<4:24:43, 7.56s/it] 83%|████████▎ | 10088/12188 [13:04<4:47:15, 8.21s/it] {'loss': 0.2761, 'grad_norm': 0.6755180057216846, 'learning_rate': 7.592745388196748e-07, 'epoch': 0.83} + 83%|████████▎ | 10088/12188 [13:04<4:47:15, 8.21s/it] 83%|████████▎ | 10089/12188 [13:11<4:33:29, 7.82s/it] {'loss': 0.278, 'grad_norm': 0.6858971598286088, 'learning_rate': 7.585707879086901e-07, 'epoch': 0.83} + 83%|████████▎ | 10089/12188 [13:11<4:33:29, 7.82s/it] 83%|████████▎ | 10090/12188 [13:20<4:45:11, 8.16s/it] {'loss': 0.3056, 'grad_norm': 0.6649372916880908, 'learning_rate': 7.578673365206224e-07, 'epoch': 0.83} + 83%|████████▎ | 10090/12188 [13:20<4:45:11, 8.16s/it] 83%|████████▎ | 10091/12188 [13:30<5:05:28, 8.74s/it] {'loss': 0.3093, 'grad_norm': 0.6552123674153661, 'learning_rate': 7.571641847051492e-07, 'epoch': 0.83} + 83%|████████▎ | 10091/12188 [13:30<5:05:28, 8.74s/it] 83%|████████▎ | 10092/12188 [13:39<5:16:06, 9.05s/it] {'loss': 0.3001, 'grad_norm': 0.7040491538069477, 'learning_rate': 7.564613325119241e-07, 'epoch': 0.83} + 83%|████████▎ | 10092/12188 [13:39<5:16:06, 9.05s/it] 83%|████████▎ | 10093/12188 [13:46<4:52:44, 8.38s/it] {'loss': 0.2753, 'grad_norm': 0.6380205780605329, 'learning_rate': 7.557587799905813e-07, 'epoch': 0.83} + 83%|████████▎ | 10093/12188 [13:46<4:52:44, 8.38s/it] 83%|████████▎ | 10094/12188 [13:58<5:26:49, 9.36s/it] {'loss': 0.2803, 'grad_norm': 0.6856862970702855, 'learning_rate': 7.550565271907357e-07, 'epoch': 0.83} + 83%|████████▎ | 10094/12188 [13:58<5:26:49, 9.36s/it] 83%|████████▎ | 10095/12188 [14:06<5:17:39, 9.11s/it] {'loss': 0.2995, 'grad_norm': 0.706874983597875, 'learning_rate': 7.543545741619762e-07, 'epoch': 0.83} + 83%|████████▎ | 10095/12188 [14:06<5:17:39, 9.11s/it] 83%|████████▎ | 10096/12188 [14:14<4:56:40, 8.51s/it] {'loss': 0.2604, 'grad_norm': 0.6897357719706212, 'learning_rate': 7.536529209538773e-07, 'epoch': 0.83} + 83%|████████▎ | 10096/12188 [14:14<4:56:40, 8.51s/it] 83%|████████▎ | 10097/12188 [14:21<4:40:53, 8.06s/it] {'loss': 0.286, 'grad_norm': 0.6726177036447709, 'learning_rate': 7.52951567615986e-07, 'epoch': 0.83} + 83%|████████▎ | 10097/12188 [14:21<4:40:53, 8.06s/it] 83%|████████▎ | 10098/12188 [14:28<4:29:09, 7.73s/it] {'loss': 0.2749, 'grad_norm': 0.8048454358368534, 'learning_rate': 7.522505141978309e-07, 'epoch': 0.83} + 83%|████████▎ | 10098/12188 [14:28<4:29:09, 7.73s/it] 83%|████████▎ | 10099/12188 [14:34<4:18:38, 7.43s/it] {'loss': 0.2819, 'grad_norm': 0.7258790797825042, 'learning_rate': 7.515497607489213e-07, 'epoch': 0.83} + 83%|████████▎ | 10099/12188 [14:34<4:18:38, 7.43s/it] 83%|████████▎ | 10100/12188 [14:44<4:41:38, 8.09s/it] {'loss': 0.3032, 'grad_norm': 0.6481230650332143, 'learning_rate': 7.508493073187411e-07, 'epoch': 0.83} + 83%|████████▎ | 10100/12188 [14:44<4:41:38, 8.09s/it] 83%|████████▎ | 10101/12188 [14:53<4:56:59, 8.54s/it] {'loss': 0.3148, 'grad_norm': 0.7778924644535784, 'learning_rate': 7.501491539567574e-07, 'epoch': 0.83} + 83%|████████▎ | 10101/12188 [14:54<4:56:59, 8.54s/it] 83%|████████▎ | 10102/12188 [15:01<4:50:45, 8.36s/it] {'loss': 0.319, 'grad_norm': 0.7362410874658782, 'learning_rate': 7.494493007124109e-07, 'epoch': 0.83} + 83%|████████▎ | 10102/12188 [15:01<4:50:45, 8.36s/it] 83%|████████▎ | 10103/12188 [15:08<4:33:55, 7.88s/it] {'loss': 0.2972, 'grad_norm': 0.731782508136441, 'learning_rate': 7.487497476351258e-07, 'epoch': 0.83} + 83%|████████▎ | 10103/12188 [15:08<4:33:55, 7.88s/it] 83%|████████▎ | 10104/12188 [15:17<4:46:17, 8.24s/it] {'loss': 0.3556, 'grad_norm': 0.7074337082606877, 'learning_rate': 7.480504947743044e-07, 'epoch': 0.83} + 83%|████████▎ | 10104/12188 [15:17<4:46:17, 8.24s/it] 83%|████████▎ | 10105/12188 [15:24<4:29:25, 7.76s/it] {'loss': 0.3124, 'grad_norm': 0.6390111217697638, 'learning_rate': 7.473515421793248e-07, 'epoch': 0.83} + 83%|████████▎ | 10105/12188 [15:24<4:29:25, 7.76s/it] 83%|████████▎ | 10106/12188 [15:33<4:39:50, 8.06s/it] {'loss': 0.2862, 'grad_norm': 0.7199038794545007, 'learning_rate': 7.466528898995479e-07, 'epoch': 0.83} + 83%|████████▎ | 10106/12188 [15:33<4:39:50, 8.06s/it] 83%|████████▎ | 10107/12188 [15:40<4:29:20, 7.77s/it] {'loss': 0.3074, 'grad_norm': 0.6554449100653877, 'learning_rate': 7.459545379843108e-07, 'epoch': 0.83} + 83%|████████▎ | 10107/12188 [15:40<4:29:20, 7.77s/it] 83%|████████▎ | 10108/12188 [15:47<4:25:06, 7.65s/it] {'loss': 0.2927, 'grad_norm': 0.6903538643669347, 'learning_rate': 7.452564864829281e-07, 'epoch': 0.83} + 83%|████████▎ | 10108/12188 [15:47<4:25:06, 7.65s/it] 83%|████████▎ | 10109/12188 [15:56<4:33:38, 7.90s/it] {'loss': 0.3005, 'grad_norm': 0.7050788142298058, 'learning_rate': 7.445587354446975e-07, 'epoch': 0.83} + 83%|████████▎ | 10109/12188 [15:56<4:33:38, 7.90s/it] 83%|████████▎ | 10110/12188 [16:04<4:34:02, 7.91s/it] {'loss': 0.3454, 'grad_norm': 0.79862160735253, 'learning_rate': 7.438612849188915e-07, 'epoch': 0.83} + 83%|████████▎ | 10110/12188 [16:04<4:34:02, 7.91s/it] 83%|████████▎ | 10111/12188 [16:11<4:24:08, 7.63s/it] {'loss': 0.3538, 'grad_norm': 0.6653949999210689, 'learning_rate': 7.43164134954763e-07, 'epoch': 0.83} + 83%|████████▎ | 10111/12188 [16:11<4:24:08, 7.63s/it] 83%|████████▎ | 10112/12188 [16:17<4:16:00, 7.40s/it] {'loss': 0.2952, 'grad_norm': 0.7074994500917232, 'learning_rate': 7.424672856015458e-07, 'epoch': 0.83} + 83%|████████▎ | 10112/12188 [16:17<4:16:00, 7.40s/it] 83%|████████▎ | 10113/12188 [16:26<4:28:05, 7.75s/it] {'loss': 0.3142, 'grad_norm': 0.6992005792421734, 'learning_rate': 7.417707369084476e-07, 'epoch': 0.83} + 83%|████████▎ | 10113/12188 [16:26<4:28:05, 7.75s/it] 83%|████████▎ | 10114/12188 [16:33<4:19:13, 7.50s/it] {'loss': 0.3181, 'grad_norm': 0.8047738690508602, 'learning_rate': 7.41074488924659e-07, 'epoch': 0.83} + 83%|████████▎ | 10114/12188 [16:33<4:19:13, 7.50s/it] 83%|████████▎ | 10115/12188 [16:40<4:11:44, 7.29s/it] {'loss': 0.3236, 'grad_norm': 0.6648384487014909, 'learning_rate': 7.40378541699347e-07, 'epoch': 0.83} + 83%|████████▎ | 10115/12188 [16:40<4:11:44, 7.29s/it] 83%|████████▎ | 10116/12188 [16:46<4:06:10, 7.13s/it] {'loss': 0.3018, 'grad_norm': 0.6702340514608126, 'learning_rate': 7.396828952816587e-07, 'epoch': 0.83} + 83%|████████▎ | 10116/12188 [16:46<4:06:10, 7.13s/it] 83%|████████▎ | 10117/12188 [16:56<4:32:35, 7.90s/it] {'loss': 0.2679, 'grad_norm': 0.6957140493737293, 'learning_rate': 7.389875497207205e-07, 'epoch': 0.83} + 83%|████████▎ | 10117/12188 [16:56<4:32:35, 7.90s/it] 83%|████████▎ | 10118/12188 [17:03<4:21:06, 7.57s/it] {'loss': 0.2897, 'grad_norm': 0.7577583290913258, 'learning_rate': 7.382925050656348e-07, 'epoch': 0.83} + 83%|████████▎ | 10118/12188 [17:03<4:21:06, 7.57s/it] 83%|████████▎ | 10119/12188 [17:10<4:11:22, 7.29s/it] {'loss': 0.2977, 'grad_norm': 0.7015314872258793, 'learning_rate': 7.375977613654861e-07, 'epoch': 0.83} + 83%|████████▎ | 10119/12188 [17:10<4:11:22, 7.29s/it] 83%|████████▎ | 10120/12188 [17:16<4:01:58, 7.02s/it] {'loss': 0.3077, 'grad_norm': 0.8030783102603775, 'learning_rate': 7.369033186693359e-07, 'epoch': 0.83} + 83%|████████▎ | 10120/12188 [17:16<4:01:58, 7.02s/it] 83%|████████▎ | 10121/12188 [17:23<4:02:51, 7.05s/it] {'loss': 0.3016, 'grad_norm': 0.7416005819079944, 'learning_rate': 7.362091770262231e-07, 'epoch': 0.83} + 83%|████████▎ | 10121/12188 [17:23<4:02:51, 7.05s/it] 83%|████████▎ | 10122/12188 [17:30<3:59:43, 6.96s/it] {'loss': 0.2913, 'grad_norm': 0.7664478474585964, 'learning_rate': 7.355153364851686e-07, 'epoch': 0.83} + 83%|████████▎ | 10122/12188 [17:30<3:59:43, 6.96s/it] 83%|████████▎ | 10123/12188 [17:37<3:59:13, 6.95s/it] {'loss': 0.2564, 'grad_norm': 0.6692493430588011, 'learning_rate': 7.348217970951687e-07, 'epoch': 0.83} + 83%|████████▎ | 10123/12188 [17:37<3:59:13, 6.95s/it] 83%|████████▎ | 10124/12188 [17:44<3:57:41, 6.91s/it] {'loss': 0.3228, 'grad_norm': 0.6455713679656401, 'learning_rate': 7.341285589052022e-07, 'epoch': 0.83} + 83%|████████▎ | 10124/12188 [17:44<3:57:41, 6.91s/it] 83%|████████▎ | 10125/12188 [17:50<3:57:07, 6.90s/it] {'loss': 0.3198, 'grad_norm': 0.7506270281329764, 'learning_rate': 7.334356219642219e-07, 'epoch': 0.83} + 83%|████████▎ | 10125/12188 [17:50<3:57:07, 6.90s/it] 83%|████████▎ | 10126/12188 [17:58<3:59:12, 6.96s/it] {'loss': 0.3412, 'grad_norm': 0.8173819980559509, 'learning_rate': 7.327429863211633e-07, 'epoch': 0.83} + 83%|████████▎ | 10126/12188 [17:58<3:59:12, 6.96s/it] 83%|████████▎ | 10127/12188 [18:04<3:57:01, 6.90s/it] {'loss': 0.2655, 'grad_norm': 0.6623319629650889, 'learning_rate': 7.320506520249404e-07, 'epoch': 0.83} + 83%|████████▎ | 10127/12188 [18:04<3:57:01, 6.90s/it] 83%|████████▎ | 10128/12188 [18:13<4:10:49, 7.31s/it] {'loss': 0.2872, 'grad_norm': 0.7638487583735598, 'learning_rate': 7.313586191244421e-07, 'epoch': 0.83} + 83%|████████▎ | 10128/12188 [18:13<4:10:49, 7.31s/it] 83%|████████▎ | 10129/12188 [18:19<4:03:30, 7.10s/it] {'loss': 0.3042, 'grad_norm': 0.6584256099190592, 'learning_rate': 7.306668876685402e-07, 'epoch': 0.83} + 83%|████████▎ | 10129/12188 [18:19<4:03:30, 7.10s/it] 83%|████████▎ | 10130/12188 [18:26<3:56:36, 6.90s/it] {'loss': 0.317, 'grad_norm': 0.661993914452775, 'learning_rate': 7.299754577060847e-07, 'epoch': 0.83} + 83%|████████▎ | 10130/12188 [18:26<3:56:36, 6.90s/it] 83%|████████▎ | 10131/12188 [18:33<3:59:47, 6.99s/it] {'loss': 0.2955, 'grad_norm': 0.6536078868608389, 'learning_rate': 7.292843292859009e-07, 'epoch': 0.83} + 83%|████████▎ | 10131/12188 [18:33<3:59:47, 6.99s/it] 83%|████████▎ | 10132/12188 [18:40<4:03:11, 7.10s/it] {'loss': 0.2993, 'grad_norm': 0.7311922617841462, 'learning_rate': 7.285935024567975e-07, 'epoch': 0.83} + 83%|███���████▎ | 10132/12188 [18:40<4:03:11, 7.10s/it] 83%|████████▎ | 10133/12188 [18:48<4:15:10, 7.45s/it] {'loss': 0.2715, 'grad_norm': 0.6870648264470243, 'learning_rate': 7.279029772675572e-07, 'epoch': 0.83} + 83%|████████▎ | 10133/12188 [18:48<4:15:10, 7.45s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 83%|████████▎ | 10134/12188 [18:55<4:04:40, 7.15s/it] {'loss': 0.6362, 'grad_norm': 0.5496667866201733, 'learning_rate': 7.27212753766946e-07, 'epoch': 0.83} + 83%|████████▎ | 10134/12188 [18:55<4:04:40, 7.15s/it] 83%|████████▎ | 10135/12188 [19:02<4:05:46, 7.18s/it] {'loss': 0.2927, 'grad_norm': 0.7332331265084335, 'learning_rate': 7.265228320037054e-07, 'epoch': 0.83} + 83%|████████▎ | 10135/12188 [19:02<4:05:46, 7.18s/it] 83%|████████▎ | 10136/12188 [19:09<4:04:10, 7.14s/it] {'loss': 0.2928, 'grad_norm': 0.6631789524834271, 'learning_rate': 7.258332120265554e-07, 'epoch': 0.83} + 83%|████████▎ | 10136/12188 [19:09<4:04:10, 7.14s/it] 83%|████████▎ | 10137/12188 [19:16<4:03:58, 7.14s/it] {'loss': 0.3139, 'grad_norm': 0.691033528400873, 'learning_rate': 7.251438938841981e-07, 'epoch': 0.83} + 83%|████████▎ | 10137/12188 [19:16<4:03:58, 7.14s/it] 83%|████████▎ | 10138/12188 [19:23<3:58:12, 6.97s/it] {'loss': 0.3285, 'grad_norm': 0.7874192690344491, 'learning_rate': 7.244548776253102e-07, 'epoch': 0.83} + 83%|████████▎ | 10138/12188 [19:23<3:58:12, 6.97s/it] 83%|████████▎ | 10139/12188 [19:34<4:37:38, 8.13s/it] {'loss': 0.3111, 'grad_norm': 0.9819575272723089, 'learning_rate': 7.237661632985493e-07, 'epoch': 0.83} + 83%|████████▎ | 10139/12188 [19:34<4:37:38, 8.13s/it] 83%|████████▎ | 10140/12188 [19:41<4:29:48, 7.90s/it] {'loss': 0.2745, 'grad_norm': 0.7085577791923221, 'learning_rate': 7.230777509525527e-07, 'epoch': 0.83} + 83%|████████▎ | 10140/12188 [19:41<4:29:48, 7.90s/it] 83%|████████▎ | 10141/12188 [19:48<4:15:17, 7.48s/it] {'loss': 0.2895, 'grad_norm': 0.720451745068773, 'learning_rate': 7.223896406359326e-07, 'epoch': 0.83} + 83%|████████▎ | 10141/12188 [19:48<4:15:17, 7.48s/it] 83%|████████▎ | 10142/12188 [19:56<4:22:58, 7.71s/it] {'loss': 0.2514, 'grad_norm': 0.6602007077388933, 'learning_rate': 7.217018323972852e-07, 'epoch': 0.83} + 83%|████████▎ | 10142/12188 [19:56<4:22:58, 7.71s/it] 83%|████████▎ | 10143/12188 [20:06<4:43:51, 8.33s/it] {'loss': 0.3208, 'grad_norm': 0.7461770710467339, 'learning_rate': 7.210143262851793e-07, 'epoch': 0.83} + 83%|████████▎ | 10143/12188 [20:06<4:43:51, 8.33s/it] 83%|████████▎ | 10144/12188 [20:13<4:32:25, 8.00s/it] {'loss': 0.3206, 'grad_norm': 0.6952572859209691, 'learning_rate': 7.203271223481672e-07, 'epoch': 0.83} + 83%|████████▎ | 10144/12188 [20:13<4:32:25, 8.00s/it] 83%|████████▎ | 10145/12188 [20:23<4:53:12, 8.61s/it] {'loss': 0.2924, 'grad_norm': 0.6968798189585115, 'learning_rate': 7.196402206347792e-07, 'epoch': 0.83} + 83%|████████▎ | 10145/12188 [20:23<4:53:12, 8.61s/it] 83%|████████▎ | 10146/12188 [20:30<4:37:29, 8.15s/it] {'loss': 0.2561, 'grad_norm': 0.7033304527119182, 'learning_rate': 7.189536211935205e-07, 'epoch': 0.83} + 83%|████████▎ | 10146/12188 [20:30<4:37:29, 8.15s/it] 83%|████████▎ | 10147/12188 [20:37<4:24:32, 7.78s/it] {'loss': 0.3271, 'grad_norm': 0.6563606995849903, 'learning_rate': 7.182673240728804e-07, 'epoch': 0.83} + 83%|████████▎ | 10147/12188 [20:37<4:24:32, 7.78s/it] 83%|████████▎ | 10148/12188 [20:44<4:21:50, 7.70s/it] {'loss': 0.3305, 'grad_norm': 0.6715362837036182, 'learning_rate': 7.175813293213224e-07, 'epoch': 0.83} + 83%|████████▎ | 10148/12188 [20:44<4:21:50, 7.70s/it] 83%|███��████▎ | 10149/12188 [20:53<4:30:40, 7.96s/it] {'loss': 0.301, 'grad_norm': 0.6222946279999154, 'learning_rate': 7.168956369872898e-07, 'epoch': 0.83} + 83%|████████▎ | 10149/12188 [20:53<4:30:40, 7.96s/it] 83%|████████▎ | 10150/12188 [21:01<4:26:48, 7.85s/it] {'loss': 0.2923, 'grad_norm': 0.711963087201784, 'learning_rate': 7.162102471192067e-07, 'epoch': 0.83} + 83%|████████▎ | 10150/12188 [21:01<4:26:48, 7.85s/it] 83%|████████▎ | 10151/12188 [21:11<4:55:12, 8.70s/it] {'loss': 0.2929, 'grad_norm': 0.8319553190205923, 'learning_rate': 7.155251597654727e-07, 'epoch': 0.83} + 83%|████████▎ | 10151/12188 [21:11<4:55:12, 8.70s/it] 83%|████████▎ | 10152/12188 [21:18<4:34:27, 8.09s/it] {'loss': 0.2765, 'grad_norm': 0.6543745080255129, 'learning_rate': 7.148403749744687e-07, 'epoch': 0.83} + 83%|████████▎ | 10152/12188 [21:18<4:34:27, 8.09s/it] 83%|████████▎ | 10153/12188 [21:25<4:23:55, 7.78s/it] {'loss': 0.3136, 'grad_norm': 0.666508285175865, 'learning_rate': 7.141558927945536e-07, 'epoch': 0.83} + 83%|████████▎ | 10153/12188 [21:25<4:23:55, 7.78s/it] 83%|████████▎ | 10154/12188 [21:34<4:35:01, 8.11s/it] {'loss': 0.2845, 'grad_norm': 0.7330957645473382, 'learning_rate': 7.134717132740626e-07, 'epoch': 0.83} + 83%|████████▎ | 10154/12188 [21:34<4:35:01, 8.11s/it] 83%|████████▎ | 10155/12188 [21:41<4:20:44, 7.70s/it] {'loss': 0.2535, 'grad_norm': 0.6216301381949731, 'learning_rate': 7.127878364613133e-07, 'epoch': 0.83} + 83%|████████▎ | 10155/12188 [21:41<4:20:44, 7.70s/it] 83%|████████▎ | 10156/12188 [21:47<4:10:03, 7.38s/it] {'loss': 0.2659, 'grad_norm': 0.7907228626012457, 'learning_rate': 7.121042624045981e-07, 'epoch': 0.83} + 83%|████████▎ | 10156/12188 [21:47<4:10:03, 7.38s/it] 83%|████████▎ | 10157/12188 [21:56<4:24:26, 7.81s/it] {'loss': 0.2959, 'grad_norm': 0.7774284383521889, 'learning_rate': 7.114209911521907e-07, 'epoch': 0.83} + 83%|████████▎ | 10157/12188 [21:56<4:24:26, 7.81s/it] 83%|████████▎ | 10158/12188 [22:03<4:16:47, 7.59s/it] {'loss': 0.275, 'grad_norm': 0.699686318181856, 'learning_rate': 7.107380227523442e-07, 'epoch': 0.83} + 83%|████████▎ | 10158/12188 [22:03<4:16:47, 7.59s/it] 83%|████████▎ | 10159/12188 [22:13<4:40:51, 8.31s/it] {'loss': 0.2811, 'grad_norm': 0.6850667313465203, 'learning_rate': 7.100553572532859e-07, 'epoch': 0.83} + 83%|████████▎ | 10159/12188 [22:13<4:40:51, 8.31s/it] 83%|████████▎ | 10160/12188 [22:20<4:26:14, 7.88s/it] {'loss': 0.2749, 'grad_norm': 0.6464353029224478, 'learning_rate': 7.093729947032274e-07, 'epoch': 0.83} + 83%|████████▎ | 10160/12188 [22:20<4:26:14, 7.88s/it] 83%|████████▎ | 10161/12188 [22:28<4:28:13, 7.94s/it] {'loss': 0.2878, 'grad_norm': 0.8589266317932714, 'learning_rate': 7.086909351503529e-07, 'epoch': 0.83} + 83%|████████▎ | 10161/12188 [22:28<4:28:13, 7.94s/it] 83%|████████▎ | 10162/12188 [22:36<4:23:16, 7.80s/it] {'loss': 0.2834, 'grad_norm': 0.6711401286519718, 'learning_rate': 7.080091786428317e-07, 'epoch': 0.83} + 83%|████████▎ | 10162/12188 [22:36<4:23:16, 7.80s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fb856ab5d50> +[Try #0] Failed to fetch sample 4616838 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fb856ab5d50> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Class: form-control input-sm'"}, {'from': 'gpt', 'value': '\nclick(x=0.489, y=0.228)\n'}]} + 83%|████████▎ | 10163/12188 [22:45<4:39:17, 8.28s/it] {'loss': 0.3412, 'grad_norm': 0.693534043549881, 'learning_rate': 7.073277252288063e-07, 'epoch': 0.83} + 83%|████████▎ | 10163/12188 [22:45<4:39:17, 8.28s/it] 83%|████████▎ | 10164/12188 [22:53<4:37:48, 8.24s/it] {'loss': 0.3373, 'grad_norm': 0.7444791502576358, 'learning_rate': 7.066465749563994e-07, 'epoch': 0.83} + 83%|████████▎ | 10164/12188 [22:53<4:37:48, 8.24s/it] 83%|████████▎ | 10165/12188 [23:00<4:21:32, 7.76s/it] {'loss': 0.2669, 'grad_norm': 0.7442621242074046, 'learning_rate': 7.059657278737136e-07, 'epoch': 0.83} + 83%|████████▎ | 10165/12188 [23:00<4:21:32, 7.76s/it] 83%|████████▎ | 10166/12188 [23:07<4:14:58, 7.57s/it] {'loss': 0.2683, 'grad_norm': 0.6392898653611955, 'learning_rate': 7.052851840288299e-07, 'epoch': 0.83} + 83%|████████▎ | 10166/12188 [23:07<4:14:58, 7.57s/it] 83%|████████▎ | 10167/12188 [23:16<4:26:51, 7.92s/it] {'loss': 0.309, 'grad_norm': 0.8299627481296765, 'learning_rate': 7.04604943469806e-07, 'epoch': 0.83} + 83%|████████▎ | 10167/12188 [23:16<4:26:51, 7.92s/it] 83%|████████▎ | 10168/12188 [23:23<4:25:40, 7.89s/it] {'loss': 0.3117, 'grad_norm': 0.6782561716433357, 'learning_rate': 7.039250062446806e-07, 'epoch': 0.83} + 83%|████████▎ | 10168/12188 [23:23<4:25:40, 7.89s/it] 83%|████████▎ | 10169/12188 [23:30<4:12:42, 7.51s/it] {'loss': 0.286, 'grad_norm': 0.7360903783614497, 'learning_rate': 7.032453724014681e-07, 'epoch': 0.83} + 83%|████████▎ | 10169/12188 [23:30<4:12:42, 7.51s/it] 83%|████████▎ | 10170/12188 [23:40<4:42:40, 8.40s/it] {'loss': 0.2962, 'grad_norm': 0.6702575201145363, 'learning_rate': 7.025660419881641e-07, 'epoch': 0.83} + 83%|████████▎ | 10170/12188 [23:41<4:42:40, 8.40s/it] 83%|████████▎ | 10171/12188 [23:47<4:26:14, 7.92s/it] {'loss': 0.3196, 'grad_norm': 0.7459398058262703, 'learning_rate': 7.01887015052743e-07, 'epoch': 0.83} + 83%|████████▎ | 10171/12188 [23:47<4:26:14, 7.92s/it] 83%|████████▎ | 10172/12188 [23:56<4:35:45, 8.21s/it] {'loss': 0.3356, 'grad_norm': 0.7272237531476677, 'learning_rate': 7.012082916431545e-07, 'epoch': 0.83} + 83%|████████▎ | 10172/12188 [23:56<4:35:45, 8.21s/it] 83%|████████▎ | 10173/12188 [24:03<4:23:26, 7.84s/it] {'loss': 0.29, 'grad_norm': 0.8053238728914313, 'learning_rate': 7.005298718073311e-07, 'epoch': 0.83} + 83%|████████▎ | 10173/12188 [24:03<4:23:26, 7.84s/it] 83%|████████▎ | 10174/12188 [24:10<4:10:57, 7.48s/it] {'loss': 0.3029, 'grad_norm': 0.68918036073592, 'learning_rate': 6.998517555931788e-07, 'epoch': 0.83} + 83%|████████▎ | 10174/12188 [24:10<4:10:57, 7.48s/it] 83%|████████▎ | 10175/12188 [24:17<4:04:47, 7.30s/it] {'loss': 0.2739, 'grad_norm': 0.7456082722861346, 'learning_rate': 6.991739430485883e-07, 'epoch': 0.83} + 83%|████████▎ | 10175/12188 [24:17<4:04:47, 7.30s/it] 83%|████████▎ | 10176/12188 [24:27<4:31:53, 8.11s/it] {'loss': 0.294, 'grad_norm': 0.7907328872122255, 'learning_rate': 6.984964342214245e-07, 'epoch': 0.83} + 83%|████████▎ | 10176/12188 [24:27<4:31:53, 8.11s/it] 84%|████████▎ | 10177/12188 [24:34<4:21:06, 7.79s/it] {'loss': 0.3085, 'grad_norm': 0.6642950899796027, 'learning_rate': 6.978192291595304e-07, 'epoch': 0.83} + 84%|████████▎ | 10177/12188 [24:34<4:21:06, 7.79s/it] 84%|████████▎ | 10178/12188 [24:41<4:12:39, 7.54s/it] {'loss': 0.3208, 'grad_norm': 0.6892085549228308, 'learning_rate': 6.971423279107309e-07, 'epoch': 0.84} + 84%|████████▎ | 10178/12188 [24:41<4:12:39, 7.54s/it] 84%|████████▎ | 10179/12188 [24:49<4:15:44, 7.64s/it] {'loss': 0.292, 'grad_norm': 0.642723316800357, 'learning_rate': 6.964657305228262e-07, 'epoch': 0.84} + 84%|████████▎ | 10179/12188 [24:49<4:15:44, 7.64s/it] 84%|████████▎ | 10180/12188 [24:55<4:07:29, 7.40s/it] {'loss': 0.302, 'grad_norm': 0.7289049239219134, 'learning_rate': 6.95789437043598e-07, 'epoch': 0.84} + 84%|████████▎ | 10180/12188 [24:55<4:07:29, 7.40s/it] 84%|████████▎ | 10181/12188 [25:02<4:02:05, 7.24s/it] {'loss': 0.3462, 'grad_norm': 0.7599936191288318, 'learning_rate': 6.951134475208049e-07, 'epoch': 0.84} + 84%|████████▎ | 10181/12188 [25:02<4:02:05, 7.24s/it] 84%|████████▎ | 10182/12188 [25:11<4:18:19, 7.73s/it] {'loss': 0.3097, 'grad_norm': 0.7506521755463053, 'learning_rate': 6.944377620021831e-07, 'epoch': 0.84} + 84%|████████▎ | 10182/12188 [25:11<4:18:19, 7.73s/it] 84%|████████▎ | 10183/12188 [25:18<4:11:41, 7.53s/it] {'loss': 0.258, 'grad_norm': 0.695925883167568, 'learning_rate': 6.937623805354493e-07, 'epoch': 0.84} + 84%|████████▎ | 10183/12188 [25:18<4:11:41, 7.53s/it] 84%|████████▎ | 10184/12188 [25:25<4:05:27, 7.35s/it] {'loss': 0.3484, 'grad_norm': 0.6892519971949449, 'learning_rate': 6.930873031682983e-07, 'epoch': 0.84} + 84%|████████▎ | 10184/12188 [25:25<4:05:27, 7.35s/it] 84%|████████▎ | 10185/12188 [25:32<3:59:55, 7.19s/it] {'loss': 0.2969, 'grad_norm': 0.7504677456249519, 'learning_rate': 6.924125299484014e-07, 'epoch': 0.84} + 84%|████████▎ | 10185/12188 [25:32<3:59:55, 7.19s/it] 84%|████████▎ | 10186/12188 [25:40<4:10:07, 7.50s/it] {'loss': 0.3471, 'grad_norm': 0.6867786232203585, 'learning_rate': 6.917380609234125e-07, 'epoch': 0.84} + 84%|████████▎ | 10186/12188 [25:40<4:10:07, 7.50s/it] 84%|████████▎ | 10187/12188 [25:49<4:27:11, 8.01s/it] {'loss': 0.3003, 'grad_norm': 0.6700559528021204, 'learning_rate': 6.910638961409583e-07, 'epoch': 0.84} + 84%|████████▎ | 10187/12188 [25:49<4:27:11, 8.01s/it] 84%|████████▎ | 10188/12188 [25:56<4:17:29, 7.72s/it] {'loss': 0.2729, 'grad_norm': 0.7877909640720578, 'learning_rate': 6.903900356486504e-07, 'epoch': 0.84} + 84%|████████▎ | 10188/12188 [25:56<4:17:29, 7.72s/it] 84%|████████▎ | 10189/12188 [26:03<4:07:01, 7.41s/it] {'loss': 0.3304, 'grad_norm': 0.6773294535083868, 'learning_rate': 6.89716479494073e-07, 'epoch': 0.84} + 84%|████████▎ | 10189/12188 [26:03<4:07:01, 7.41s/it] 84%|████████▎ | 10190/12188 [26:10<3:59:36, 7.20s/it] {'loss': 0.2836, 'grad_norm': 0.7117218203420311, 'learning_rate': 6.890432277247943e-07, 'epoch': 0.84} + 84%|████████▎ | 10190/12188 [26:10<3:59:36, 7.20s/it] 84%|████████▎ | 10191/12188 [26:19<4:17:43, 7.74s/it] {'loss': 0.3076, 'grad_norm': 0.8615784628379048, 'learning_rate': 6.883702803883563e-07, 'epoch': 0.84} + 84%|████████▎ | 10191/12188 [26:19<4:17:43, 7.74s/it] 84%|████████▎ | 10192/12188 [26:26<4:12:51, 7.60s/it] {'loss': 0.3097, 'grad_norm': 0.7135823174102636, 'learning_rate': 6.876976375322808e-07, 'epoch': 0.84} + 84%|████████▎ | 10192/12188 [26:26<4:12:51, 7.60s/it] 84%|████████▎ | 10193/12188 [26:33<4:08:13, 7.47s/it] {'loss': 0.2953, 'grad_norm': 0.6729011340131362, 'learning_rate': 6.870252992040705e-07, 'epoch': 0.84} + 84%|████████▎ | 10193/12188 [26:33<4:08:13, 7.47s/it] 84%|████████▎ | 10194/12188 [26:40<4:00:19, 7.23s/it] {'loss': 0.2985, 'grad_norm': 0.816674365842581, 'learning_rate': 6.86353265451205e-07, 'epoch': 0.84} + 84%|████████▎ | 10194/12188 [26:40<4:00:19, 7.23s/it] 84%|████████▎ | 10195/12188 [26:47<4:02:17, 7.29s/it] {'loss': 0.2967, 'grad_norm': 0.7077903111249407, 'learning_rate': 6.856815363211399e-07, 'epoch': 0.84} + 84%|████████▎ | 10195/12188 [26:47<4:02:17, 7.29s/it] 84%|████████▎ | 10196/12188 [26:54<4:00:50, 7.25s/it] {'loss': 0.2983, 'grad_norm': 0.6425132820516573, 'learning_rate': 6.85010111861315e-07, 'epoch': 0.84} + 84%|████████▎ | 10196/12188 [26:55<4:00:50, 7.25s/it] 84%|████████▎ | 10197/12188 [27:01<3:56:32, 7.13s/it] {'loss': 0.2706, 'grad_norm': 0.7075233408839382, 'learning_rate': 6.843389921191423e-07, 'epoch': 0.84} + 84%|████████▎ | 10197/12188 [27:01<3:56:32, 7.13s/it] 84%|████████▎ | 10198/12188 [27:08<3:54:33, 7.07s/it] {'loss': 0.2811, 'grad_norm': 0.7090324415673515, 'learning_rate': 6.836681771420162e-07, 'epoch': 0.84} + 84%|████████▎ | 10198/12188 [27:08<3:54:33, 7.07s/it] 84%|████████▎ | 10199/12188 [27:15<3:51:20, 6.98s/it] {'loss': 0.2923, 'grad_norm': 0.7335067205736278, 'learning_rate': 6.829976669773098e-07, 'epoch': 0.84} + 84%|████████▎ | 10199/12188 [27:15<3:51:20, 6.98s/it][2025-08-17 22:50:37,480] [WARNING] [stage3.py:2118:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time + 84%|████████▎ | 10200/12188 [27:24<4:11:10, 7.58s/it] {'loss': 0.2972, 'grad_norm': 0.9049823039876141, 'learning_rate': 6.823274616723707e-07, 'epoch': 0.84} + 84%|████████▎ | 10200/12188 [27:24<4:11:10, 7.58s/it] 84%|████████▎ | 10201/12188 [27:31<4:02:30, 7.32s/it] {'loss': 0.2997, 'grad_norm': 0.7171381827512318, 'learning_rate': 6.816575612745302e-07, 'epoch': 0.84} + 84%|████████▎ | 10201/12188 [27:31<4:02:30, 7.32s/it] 84%|████████▎ | 10202/12188 [27:39<4:14:02, 7.67s/it] {'loss': 0.3405, 'grad_norm': 0.8109452144591424, 'learning_rate': 6.809879658310953e-07, 'epoch': 0.84} + 84%|████████▎ | 10202/12188 [27:39<4:14:02, 7.67s/it] 84%|████████▎ | 10203/12188 [27:46<4:07:16, 7.47s/it] {'loss': 0.3149, 'grad_norm': 0.690729859190234, 'learning_rate': 6.803186753893515e-07, 'epoch': 0.84} + 84%|████████▎ | 10203/12188 [27:46<4:07:16, 7.47s/it] 84%|████████▎ | 10204/12188 [27:54<4:08:17, 7.51s/it] {'loss': 0.314, 'grad_norm': 0.8864271446375669, 'learning_rate': 6.796496899965627e-07, 'epoch': 0.84} + 84%|████████▎ | 10204/12188 [27:54<4:08:17, 7.51s/it] 84%|████████▎ | 10205/12188 [28:01<4:03:45, 7.38s/it] {'loss': 0.316, 'grad_norm': 0.7550977467668402, 'learning_rate': 6.78981009699971e-07, 'epoch': 0.84} + 84%|████████▎ | 10205/12188 [28:01<4:03:45, 7.38s/it] 84%|████████▎ | 10206/12188 [28:08<4:01:29, 7.31s/it] {'loss': 0.3467, 'grad_norm': 0.8573900612241276, 'learning_rate': 6.783126345467978e-07, 'epoch': 0.84} + 84%|████████▎ | 10206/12188 [28:08<4:01:29, 7.31s/it] 84%|████████▎ | 10207/12188 [28:15<4:02:07, 7.33s/it] {'loss': 0.3211, 'grad_norm': 0.6889414909432193, 'learning_rate': 6.776445645842439e-07, 'epoch': 0.84} + 84%|████████▎ | 10207/12188 [28:15<4:02:07, 7.33s/it] 84%|████████▍ | 10208/12188 [28:25<4:25:35, 8.05s/it] {'loss': 0.3076, 'grad_norm': 1.1509947090298513, 'learning_rate': 6.769767998594857e-07, 'epoch': 0.84} + 84%|████████▍ | 10208/12188 [28:25<4:25:35, 8.05s/it] 84%|████████▍ | 10209/12188 [28:32<4:13:00, 7.67s/it] {'loss': 0.3096, 'grad_norm': 0.7395425950097172, 'learning_rate': 6.763093404196808e-07, 'epoch': 0.84} + 84%|████████▍ | 10209/12188 [28:32<4:13:00, 7.67s/it] 84%|████████▍ | 10210/12188 [28:39<4:10:24, 7.60s/it] {'loss': 0.3913, 'grad_norm': 0.7731742489620366, 'learning_rate': 6.756421863119634e-07, 'epoch': 0.84} + 84%|████████▍ | 10210/12188 [28:39<4:10:24, 7.60s/it] 84%|████████▍ | 10211/12188 [28:46<4:05:20, 7.45s/it] {'loss': 0.2937, 'grad_norm': 0.6863193771290824, 'learning_rate': 6.749753375834467e-07, 'epoch': 0.84} + 84%|████████▍ | 10211/12188 [28:46<4:05:20, 7.45s/it] 84%|████████▍ | 10212/12188 [28:56<4:21:23, 7.94s/it] {'loss': 0.2914, 'grad_norm': 0.726274597264339, 'learning_rate': 6.743087942812243e-07, 'epoch': 0.84} + 84%|████████▍ | 10212/12188 [28:56<4:21:23, 7.94s/it] 84%|████████▍ | 10213/12188 [29:04<4:28:57, 8.17s/it] {'loss': 0.3149, 'grad_norm': 0.7143027980581816, 'learning_rate': 6.736425564523641e-07, 'epoch': 0.84} + 84%|████████▍ | 10213/12188 [29:04<4:28:57, 8.17s/it] 84%|████████▍ | 10214/12188 [29:12<4:26:47, 8.11s/it] {'loss': 0.2715, 'grad_norm': 0.6722003734101872, 'learning_rate': 6.729766241439167e-07, 'epoch': 0.84} + 84%|████████▍ | 10214/12188 [29:12<4:26:47, 8.11s/it] 84%|████████▍ | 10215/12188 [29:19<4:17:39, 7.84s/it] {'loss': 0.3058, 'grad_norm': 0.6559680759999786, 'learning_rate': 6.723109974029074e-07, 'epoch': 0.84} + 84%|████████▍ | 10215/12188 [29:19<4:17:39, 7.84s/it] 84%|████████▍ | 10216/12188 [29:27<4:11:11, 7.64s/it] {'loss': 0.2716, 'grad_norm': 0.7105086637412688, 'learning_rate': 6.716456762763435e-07, 'epoch': 0.84} + 84%|████████▍ | 10216/12188 [29:27<4:11:11, 7.64s/it] 84%|████████▍ | 10217/12188 [29:34<4:07:29, 7.53s/it] {'loss': 0.2916, 'grad_norm': 0.6972288665678524, 'learning_rate': 6.709806608112068e-07, 'epoch': 0.84} + 84%|████████▍ | 10217/12188 [29:34<4:07:29, 7.53s/it] 84%|████████▍ | 10218/12188 [29:41<4:01:36, 7.36s/it] {'loss': 0.274, 'grad_norm': 0.7009944230405746, 'learning_rate': 6.703159510544616e-07, 'epoch': 0.84} + 84%|████████▍ | 10218/12188 [29:41<4:01:36, 7.36s/it] 84%|████████▍ | 10219/12188 [29:49<4:09:44, 7.61s/it] {'loss': 0.3336, 'grad_norm': 0.7201102181454205, 'learning_rate': 6.696515470530468e-07, 'epoch': 0.84} + 84%|████████▍ | 10219/12188 [29:49<4:09:44, 7.61s/it] 84%|████████▍ | 10220/12188 [29:56<4:04:30, 7.45s/it] {'loss': 0.3031, 'grad_norm': 0.7077220485840414, 'learning_rate': 6.689874488538833e-07, 'epoch': 0.84} + 84%|████████▍ | 10220/12188 [29:56<4:04:30, 7.45s/it] 84%|████████▍ | 10221/12188 [30:04<4:07:04, 7.54s/it] {'loss': 0.294, 'grad_norm': 0.8115619508600275, 'learning_rate': 6.683236565038676e-07, 'epoch': 0.84} + 84%|████████▍ | 10221/12188 [30:04<4:07:04, 7.54s/it] 84%|████████▍ | 10222/12188 [30:10<3:57:41, 7.25s/it] {'loss': 0.3083, 'grad_norm': 0.7003059308124719, 'learning_rate': 6.676601700498764e-07, 'epoch': 0.84} + 84%|████████▍ | 10222/12188 [30:10<3:57:41, 7.25s/it] 84%|████████▍ | 10223/12188 [30:17<3:53:54, 7.14s/it] {'loss': 0.265, 'grad_norm': 0.7075763797207846, 'learning_rate': 6.669969895387623e-07, 'epoch': 0.84} + 84%|████████▍ | 10223/12188 [30:17<3:53:54, 7.14s/it] 84%|████████▍ | 10224/12188 [30:24<3:52:09, 7.09s/it] {'loss': 0.3309, 'grad_norm': 0.7264105797886975, 'learning_rate': 6.663341150173597e-07, 'epoch': 0.84} + 84%|████████▍ | 10224/12188 [30:24<3:52:09, 7.09s/it] 84%|████████▍ | 10225/12188 [30:31<3:50:06, 7.03s/it] {'loss': 0.3044, 'grad_norm': 0.6575823328574605, 'learning_rate': 6.656715465324803e-07, 'epoch': 0.84} + 84%|████████▍ | 10225/12188 [30:31<3:50:06, 7.03s/it] 84%|████████▍ | 10226/12188 [30:39<3:53:43, 7.15s/it] {'loss': 0.3247, 'grad_norm': 0.8020110717621628, 'learning_rate': 6.650092841309114e-07, 'epoch': 0.84} + 84%|████████▍ | 10226/12188 [30:39<3:53:43, 7.15s/it] 84%|████████▍ | 10227/12188 [30:46<3:54:48, 7.18s/it] {'loss': 0.3063, 'grad_norm': 0.6668109499504378, 'learning_rate': 6.643473278594231e-07, 'epoch': 0.84} + 84%|████████▍ | 10227/12188 [30:46<3:54:48, 7.18s/it] 84%|████████▍ | 10228/12188 [30:54<4:00:22, 7.36s/it] {'loss': 0.2919, 'grad_norm': 0.6505347473522399, 'learning_rate': 6.636856777647599e-07, 'epoch': 0.84} + 84%|████████▍ | 10228/12188 [30:54<4:00:22, 7.36s/it] 84%|████████▍ | 10229/12188 [31:00<3:52:40, 7.13s/it] {'loss': 0.2798, 'grad_norm': 0.7281585340430451, 'learning_rate': 6.630243338936476e-07, 'epoch': 0.84} + 84%|████████▍ | 10229/12188 [31:00<3:52:40, 7.13s/it] 84%|████████▍ | 10230/12188 [31:07<3:48:38, 7.01s/it] {'loss': 0.294, 'grad_norm': 0.6927295228825926, 'learning_rate': 6.623632962927895e-07, 'epoch': 0.84} + 84%|████████▍ | 10230/12188 [31:07<3:48:38, 7.01s/it] 84%|████████▍ | 10231/12188 [31:14<3:45:38, 6.92s/it] {'loss': 0.3009, 'grad_norm': 0.7100454885939257, 'learning_rate': 6.617025650088671e-07, 'epoch': 0.84} + 84%|████████▍ | 10231/12188 [31:14<3:45:38, 6.92s/it] 84%|████████▍ | 10232/12188 [31:20<3:43:14, 6.85s/it] {'loss': 0.2634, 'grad_norm': 0.7058019913334829, 'learning_rate': 6.610421400885392e-07, 'epoch': 0.84} + 84%|████████▍ | 10232/12188 [31:20<3:43:14, 6.85s/it] 84%|████████▍ | 10233/12188 [31:27<3:42:50, 6.84s/it] {'loss': 0.3228, 'grad_norm': 0.7574641407640577, 'learning_rate': 6.603820215784429e-07, 'epoch': 0.84} + 84%|████████▍ | 10233/12188 [31:27<3:42:50, 6.84s/it] 84%|████████▍ | 10234/12188 [31:36<4:04:00, 7.49s/it] {'loss': 0.293, 'grad_norm': 0.6679346372961228, 'learning_rate': 6.597222095251965e-07, 'epoch': 0.84} + 84%|████████▍ | 10234/12188 [31:36<4:04:00, 7.49s/it] 84%|████████▍ | 10235/12188 [31:44<4:03:42, 7.49s/it] {'loss': 0.2809, 'grad_norm': 0.6618088880191059, 'learning_rate': 6.590627039753955e-07, 'epoch': 0.84} + 84%|████████▍ | 10235/12188 [31:44<4:03:42, 7.49s/it] 84%|████████▍ | 10236/12188 [31:51<3:57:57, 7.31s/it] {'loss': 0.3373, 'grad_norm': 0.7253142285169019, 'learning_rate': 6.58403504975611e-07, 'epoch': 0.84} + 84%|████████▍ | 10236/12188 [31:51<3:57:57, 7.31s/it] 84%|████████▍ | 10237/12188 [31:58<4:00:50, 7.41s/it] {'loss': 0.2823, 'grad_norm': 0.7246321306514762, 'learning_rate': 6.577446125723958e-07, 'epoch': 0.84} + 84%|████████▍ | 10237/12188 [31:58<4:00:50, 7.41s/it] 84%|████████▍ | 10238/12188 [32:05<3:58:33, 7.34s/it] {'loss': 0.3082, 'grad_norm': 0.7711102285478891, 'learning_rate': 6.570860268122808e-07, 'epoch': 0.84} + 84%|████████▍ | 10238/12188 [32:05<3:58:33, 7.34s/it] 84%|████████▍ | 10239/12188 [32:15<4:23:14, 8.10s/it] {'loss': 0.2652, 'grad_norm': 0.7300911762834281, 'learning_rate': 6.564277477417719e-07, 'epoch': 0.84} + 84%|████████▍ | 10239/12188 [32:15<4:23:14, 8.10s/it] 84%|████████▍ | 10240/12188 [32:23<4:20:08, 8.01s/it] {'loss': 0.2982, 'grad_norm': 0.6864522137339553, 'learning_rate': 6.557697754073577e-07, 'epoch': 0.84} + 84%|████████▍ | 10240/12188 [32:23<4:20:08, 8.01s/it] 84%|████████▍ | 10241/12188 [32:32<4:24:30, 8.15s/it] {'loss': 0.2821, 'grad_norm': 0.650912482704844, 'learning_rate': 6.551121098555019e-07, 'epoch': 0.84} + 84%|████████▍ | 10241/12188 [32:32<4:24:30, 8.15s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fa9acf30900> +[Try #0] Failed to fetch sample 4617819 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fa9acf30900> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Contact us'"}, {'from': 'gpt', 'value': '\nclick(x=0.8505, y=0.32)\n'}]} +[2025-08-17 22:55:54,041] [WARNING] [stage3.py:2118:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time + 84%|████████▍ | 10242/12188 [32:41<4:32:50, 8.41s/it] {'loss': 0.2827, 'grad_norm': 0.6161747816523404, 'learning_rate': 6.544547511326482e-07, 'epoch': 0.84} + 84%|████████▍ | 10242/12188 [32:41<4:32:50, 8.41s/it] 84%|████████▍ | 10243/12188 [32:48<4:20:08, 8.03s/it] {'loss': 0.3526, 'grad_norm': 0.7252215433636003, 'learning_rate': 6.537976992852196e-07, 'epoch': 0.84} + 84%|████████▍ | 10243/12188 [32:48<4:20:08, 8.03s/it] 84%|████████▍ | 10244/12188 [32:59<4:50:03, 8.95s/it] {'loss': 0.3002, 'grad_norm': 0.8093791959697517, 'learning_rate': 6.531409543596146e-07, 'epoch': 0.84} + 84%|████████▍ | 10244/12188 [32:59<4:50:03, 8.95s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fd63f97eca0> +[Try #0] Failed to fetch sample 4335455 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fd63f97eca0> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Watches'"}, {'from': 'gpt', 'value': '\nclick(x=0.681, y=0.266)\n'}]} + 84%|████████▍ | 10245/12188 [33:05<4:26:29, 8.23s/it] {'loss': 0.2826, 'grad_norm': 0.782349839174406, 'learning_rate': 6.524845164022104e-07, 'epoch': 0.84} + 84%|████████▍ | 10245/12188 [33:05<4:26:29, 8.23s/it] 84%|████████▍ | 10246/12188 [33:12<4:09:06, 7.70s/it] {'loss': 0.3111, 'grad_norm': 0.6980636504089651, 'learning_rate': 6.51828385459366e-07, 'epoch': 0.84} + 84%|████████▍ | 10246/12188 [33:12<4:09:06, 7.70s/it] 84%|████████▍ | 10247/12188 [33:19<4:05:54, 7.60s/it] {'loss': 0.3041, 'grad_norm': 0.6733709099902728, 'learning_rate': 6.511725615774139e-07, 'epoch': 0.84} + 84%|████████▍ | 10247/12188 [33:19<4:05:54, 7.60s/it] 84%|████████▍ | 10248/12188 [33:26<3:57:18, 7.34s/it] {'loss': 0.2699, 'grad_norm': 0.7198089764017666, 'learning_rate': 6.505170448026699e-07, 'epoch': 0.84} + 84%|████████▍ | 10248/12188 [33:26<3:57:18, 7.34s/it] 84%|████████▍ | 10249/12188 [33:35<4:12:40, 7.82s/it] {'loss': 0.3215, 'grad_norm': 0.8673087197017996, 'learning_rate': 6.498618351814229e-07, 'epoch': 0.84} + 84%|████████▍ | 10249/12188 [33:35<4:12:40, 7.82s/it] 84%|████████▍ | 10250/12188 [33:42<4:04:37, 7.57s/it] {'loss': 0.2784, 'grad_norm': 0.6807196873647888, 'learning_rate': 6.492069327599454e-07, 'epoch': 0.84} + 84%|████████▍ | 10250/12188 [33:42<4:04:37, 7.57s/it] 84%|████████▍ | 10251/12188 [33:49<4:03:22, 7.54s/it] {'loss': 0.3092, 'grad_norm': 0.6505669164090571, 'learning_rate': 6.485523375844826e-07, 'epoch': 0.84} + 84%|████████▍ | 10251/12188 [33:49<4:03:22, 7.54s/it] 84%|████████▍ | 10252/12188 [33:56<3:53:35, 7.24s/it] {'loss': 0.33, 'grad_norm': 0.8002842596523334, 'learning_rate': 6.478980497012632e-07, 'epoch': 0.84} + 84%|████████▍ | 10252/12188 [33:56<3:53:35, 7.24s/it] 84%|████████▍ | 10253/12188 [34:03<3:50:13, 7.14s/it] {'loss': 0.3061, 'grad_norm': 0.7043381298667931, 'learning_rate': 6.472440691564924e-07, 'epoch': 0.84} + 84%|████████▍ | 10253/12188 [34:03<3:50:13, 7.14s/it] 84%|████████▍ | 10254/12188 [34:11<4:04:48, 7.59s/it] {'loss': 0.3167, 'grad_norm': 0.630102878479862, 'learning_rate': 6.46590395996351e-07, 'epoch': 0.84} + 84%|████████▍ | 10254/12188 [34:11<4:04:48, 7.59s/it] 84%|████████▍ | 10255/12188 [34:18<3:58:20, 7.40s/it] {'loss': 0.3072, 'grad_norm': 0.7023815613984672, 'learning_rate': 6.459370302670015e-07, 'epoch': 0.84} + 84%|████████▍ | 10255/12188 [34:18<3:58:20, 7.40s/it] 84%|████████▍ | 10256/12188 [34:26<3:57:28, 7.37s/it] {'loss': 0.2817, 'grad_norm': 0.6474440664437645, 'learning_rate': 6.452839720145848e-07, 'epoch': 0.84} + 84%|████████▍ | 10256/12188 [34:26<3:57:28, 7.37s/it] 84%|████████▍ | 10257/12188 [34:33<3:59:47, 7.45s/it] {'loss': 0.2653, 'grad_norm': 0.6472499507919702, 'learning_rate': 6.446312212852162e-07, 'epoch': 0.84} + 84%|████████▍ | 10257/12188 [34:33<3:59:47, 7.45s/it] 84%|████████▍ | 10258/12188 [34:41<4:02:37, 7.54s/it] {'loss': 0.3025, 'grad_norm': 0.9430887235314466, 'learning_rate': 6.439787781249945e-07, 'epoch': 0.84} + 84%|████████▍ | 10258/12188 [34:41<4:02:37, 7.54s/it] 84%|████████▍ | 10259/12188 [34:48<3:54:33, 7.30s/it] {'loss': 0.3094, 'grad_norm': 0.7477779124078837, 'learning_rate': 6.433266425799933e-07, 'epoch': 0.84} + 84%|████████▍ | 10259/12188 [34:48<3:54:33, 7.30s/it] 84%|████████▍ | 10260/12188 [34:56<4:07:48, 7.71s/it] {'loss': 0.292, 'grad_norm': 0.656231713963856, 'learning_rate': 6.426748146962635e-07, 'epoch': 0.84} + 84%|████████▍ | 10260/12188 [34:56<4:07:48, 7.71s/it] 84%|████████▍ | 10261/12188 [35:04<4:03:26, 7.58s/it] {'loss': 0.3285, 'grad_norm': 0.7446698924995839, 'learning_rate': 6.420232945198395e-07, 'epoch': 0.84} + 84%|████████▍ | 10261/12188 [35:04<4:03:26, 7.58s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f78442b14e0> +[Try #0] Failed to fetch sample 4558880 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f78442b14e0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Settings - Sleeping'"}, {'from': 'gpt', 'value': '\nclick(x=0.566, y=0.012)\n'}]} + 84%|████████▍ | 10262/12188 [35:11<3:59:13, 7.45s/it] {'loss': 0.3056, 'grad_norm': 0.7112236799179988, 'learning_rate': 6.413720820967273e-07, 'epoch': 0.84} + 84%|████████▍ | 10262/12188 [35:11<3:59:13, 7.45s/it] 84%|████████▍ | 10263/12188 [35:19<4:02:57, 7.57s/it] {'loss': 0.2846, 'grad_norm': 0.6921328718795807, 'learning_rate': 6.407211774729171e-07, 'epoch': 0.84} + 84%|████████▍ | 10263/12188 [35:19<4:02:57, 7.57s/it] 84%|████████▍ | 10264/12188 [35:26<3:59:18, 7.46s/it] {'loss': 0.2721, 'grad_norm': 0.8803700452544846, 'learning_rate': 6.400705806943724e-07, 'epoch': 0.84} + 84%|████████▍ | 10264/12188 [35:26<3:59:18, 7.46s/it] 84%|████████▍ | 10265/12188 [35:33<3:58:30, 7.44s/it] {'loss': 0.3012, 'grad_norm': 0.7310692590383129, 'learning_rate': 6.394202918070391e-07, 'epoch': 0.84} + 84%|████████▍ | 10265/12188 [35:33<3:58:30, 7.44s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f6ec5077330> +[Try #0] Failed to fetch sample 4698401 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f6ec5077330> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Code of Conduct'"}, {'from': 'gpt', 'value': '\nclick(x=0.917, y=0.558)\n'}]} + 84%|████████▍ | 10266/12188 [35:41<3:56:42, 7.39s/it] {'loss': 0.315, 'grad_norm': 0.6573785809829074, 'learning_rate': 6.387703108568394e-07, 'epoch': 0.84} + 84%|████████▍ | 10266/12188 [35:41<3:56:42, 7.39s/it] 84%|████████▍ | 10267/12188 [35:47<3:50:43, 7.21s/it] {'loss': 0.3077, 'grad_norm': 0.6878021130646209, 'learning_rate': 6.381206378896726e-07, 'epoch': 0.84} + 84%|████████▍ | 10267/12188 [35:47<3:50:43, 7.21s/it] 84%|████████▍ | 10268/12188 [35:55<3:50:41, 7.21s/it] {'loss': 0.2762, 'grad_norm': 0.7420303798696666, 'learning_rate': 6.374712729514198e-07, 'epoch': 0.84} + 84%|████████▍ | 10268/12188 [35:55<3:50:41, 7.21s/it] 84%|████████▍ | 10269/12188 [36:02<3:48:26, 7.14s/it] {'loss': 0.297, 'grad_norm': 0.7095964431870814, 'learning_rate': 6.368222160879356e-07, 'epoch': 0.84} + 84%|████████▍ | 10269/12188 [36:02<3:48:26, 7.14s/it] 84%|████████▍ | 10270/12188 [36:09<3:51:00, 7.23s/it] {'loss': 0.3037, 'grad_norm': 0.7020161935315772, 'learning_rate': 6.361734673450565e-07, 'epoch': 0.84} + 84%|████████▍ | 10270/12188 [36:09<3:51:00, 7.23s/it] 84%|████████▍ | 10271/12188 [36:17<4:02:03, 7.58s/it] {'loss': 0.3125, 'grad_norm': 0.7790871025553472, 'learning_rate': 6.35525026768597e-07, 'epoch': 0.84} + 84%|████████▍ | 10271/12188 [36:17<4:02:03, 7.58s/it] 84%|████████▍ | 10272/12188 [36:24<3:53:03, 7.30s/it] {'loss': 0.2988, 'grad_norm': 0.7629930214398738, 'learning_rate': 6.348768944043482e-07, 'epoch': 0.84} + 84%|████████▍ | 10272/12188 [36:24<3:53:03, 7.30s/it] 84%|████████▍ | 10273/12188 [36:31<3:47:19, 7.12s/it] {'loss': 0.2798, 'grad_norm': 0.7064761705168094, 'learning_rate': 6.342290702980785e-07, 'epoch': 0.84} + 84%|████████▍ | 10273/12188 [36:31<3:47:19, 7.12s/it] 84%|████████▍ | 10274/12188 [36:40<4:05:29, 7.70s/it] {'loss': 0.3072, 'grad_norm': 0.6260448703415294, 'learning_rate': 6.335815544955392e-07, 'epoch': 0.84} + 84%|████████▍ | 10274/12188 [36:40<4:05:29, 7.70s/it] 84%|████████▍ | 10275/12188 [36:46<3:55:09, 7.38s/it] {'loss': 0.2846, 'grad_norm': 0.7690344940209859, 'learning_rate': 6.329343470424542e-07, 'epoch': 0.84} + 84%|████████▍ | 10275/12188 [36:46<3:55:09, 7.38s/it] 84%|████████▍ | 10276/12188 [36:54<3:52:49, 7.31s/it] {'loss': 0.3091, 'grad_norm': 0.9175475808828734, 'learning_rate': 6.322874479845309e-07, 'epoch': 0.84} + 84%|████████▍ | 10276/12188 [36:54<3:52:49, 7.31s/it] 84%|████████▍ | 10277/12188 [37:00<3:48:40, 7.18s/it] {'loss': 0.3177, 'grad_norm': 0.7979576414288193, 'learning_rate': 6.316408573674492e-07, 'epoch': 0.84} + 84%|████████▍ | 10277/12188 [37:00<3:48:40, 7.18s/it] 84%|████████▍ | 10278/12188 [37:07<3:45:02, 7.07s/it] {'loss': 0.2983, 'grad_norm': 0.6632204170137554, 'learning_rate': 6.309945752368718e-07, 'epoch': 0.84} + 84%|████████▍ | 10278/12188 [37:07<3:45:02, 7.07s/it] 84%|████████▍ | 10279/12188 [37:15<3:51:36, 7.28s/it] {'loss': 0.3282, 'grad_norm': 0.7412750574844645, 'learning_rate': 6.303486016384392e-07, 'epoch': 0.84} + 84%|████████▍ | 10279/12188 [37:15<3:51:36, 7.28s/it] 84%|████████▍ | 10280/12188 [37:24<4:03:16, 7.65s/it] {'loss': 0.3118, 'grad_norm': 0.739312439645467, 'learning_rate': 6.29702936617767e-07, 'epoch': 0.84} + 84%|████████▍ | 10280/12188 [37:24<4:03:16, 7.65s/it] 84%|████████▍ | 10281/12188 [37:31<3:57:24, 7.47s/it] {'loss': 0.3213, 'grad_norm': 0.7106480853090031, 'learning_rate': 6.290575802204535e-07, 'epoch': 0.84} + 84%|████████▍ | 10281/12188 [37:31<3:57:24, 7.47s/it] 84%|████████▍ | 10282/12188 [37:38<3:55:50, 7.42s/it] {'loss': 0.3199, 'grad_norm': 0.7251713754312874, 'learning_rate': 6.284125324920698e-07, 'epoch': 0.84} + 84%|████████▍ | 10282/12188 [37:38<3:55:50, 7.42s/it] 84%|████████▍ | 10283/12188 [37:45<3:52:22, 7.32s/it] {'loss': 0.3268, 'grad_norm': 0.6934949120356979, 'learning_rate': 6.277677934781695e-07, 'epoch': 0.84} + 84%|████████▍ | 10283/12188 [37:45<3:52:22, 7.32s/it] 84%|████████▍ | 10284/12188 [37:52<3:49:08, 7.22s/it] {'loss': 0.2849, 'grad_norm': 0.7758910702109484, 'learning_rate': 6.27123363224284e-07, 'epoch': 0.84} + 84%|████████▍ | 10284/12188 [37:52<3:49:08, 7.22s/it] 84%|████████▍ | 10285/12188 [37:59<3:45:46, 7.12s/it] {'loss': 0.3266, 'grad_norm': 0.759121602499613, 'learning_rate': 6.264792417759202e-07, 'epoch': 0.84} + 84%|████████▍ | 10285/12188 [37:59<3:45:46, 7.12s/it] 84%|████████▍ | 10286/12188 [38:06<3:47:45, 7.18s/it] {'loss': 0.2611, 'grad_norm': 0.8419404599933548, 'learning_rate': 6.258354291785668e-07, 'epoch': 0.84} + 84%|████████▍ | 10286/12188 [38:06<3:47:45, 7.18s/it] 84%|████████▍ | 10287/12188 [38:13<3:44:08, 7.07s/it] {'loss': 0.276, 'grad_norm': 0.667349437862789, 'learning_rate': 6.251919254776878e-07, 'epoch': 0.84} + 84%|████████▍ | 10287/12188 [38:13<3:44:08, 7.07s/it] 84%|████████▍ | 10288/12188 [38:20<3:44:47, 7.10s/it] {'loss': 0.3023, 'grad_norm': 0.6765538278946803, 'learning_rate': 6.245487307187253e-07, 'epoch': 0.84} + 84%|████████▍ | 10288/12188 [38:20<3:44:47, 7.10s/it] 84%|████████▍ | 10289/12188 [38:28<3:49:15, 7.24s/it] {'loss': 0.3326, 'grad_norm': 0.754433259655461, 'learning_rate': 6.239058449471025e-07, 'epoch': 0.84} + 84%|████████▍ | 10289/12188 [38:28<3:49:15, 7.24s/it] 84%|████████▍ | 10290/12188 [38:35<3:50:16, 7.28s/it] {'loss': 0.3, 'grad_norm': 0.7285707836152, 'learning_rate': 6.232632682082174e-07, 'epoch': 0.84} + 84%|████████▍ | 10290/12188 [38:35<3:50:16, 7.28s/it] 84%|████████▍ | 10291/12188 [38:42<3:42:57, 7.05s/it] {'loss': 0.2931, 'grad_norm': 0.6964495013728291, 'learning_rate': 6.226210005474486e-07, 'epoch': 0.84} + 84%|████████▍ | 10291/12188 [38:42<3:42:57, 7.05s/it] 84%|████████▍ | 10292/12188 [38:48<3:38:34, 6.92s/it] {'loss': 0.3115, 'grad_norm': 0.7716694066945261, 'learning_rate': 6.219790420101529e-07, 'epoch': 0.84} + 84%|████████▍ | 10292/12188 [38:48<3:38:34, 6.92s/it] 84%|████████▍ | 10293/12188 [38:57<3:57:07, 7.51s/it] {'loss': 0.28, 'grad_norm': 0.6693625833043616, 'learning_rate': 6.213373926416627e-07, 'epoch': 0.84} + 84%|████████▍ | 10293/12188 [38:57<3:57:07, 7.51s/it] 84%|████████▍ | 10294/12188 [39:04<3:52:02, 7.35s/it] {'loss': 0.2753, 'grad_norm': 0.8984848260542913, 'learning_rate': 6.206960524872913e-07, 'epoch': 0.84} + 84%|████████▍ | 10294/12188 [39:04<3:52:02, 7.35s/it] 84%|████████▍ | 10295/12188 [39:11<3:46:25, 7.18s/it] {'loss': 0.3032, 'grad_norm': 0.8154432358731508, 'learning_rate': 6.200550215923284e-07, 'epoch': 0.84} + 84%|████████▍ | 10295/12188 [39:11<3:46:25, 7.18s/it] 84%|████████▍ | 10296/12188 [39:18<3:49:47, 7.29s/it] {'loss': 0.3031, 'grad_norm': 0.7212392623620232, 'learning_rate': 6.194143000020425e-07, 'epoch': 0.84} + 84%|████████▍ | 10296/12188 [39:18<3:49:47, 7.29s/it] 84%|████████▍ | 10297/12188 [39:25<3:44:13, 7.11s/it] {'loss': 0.2814, 'grad_norm': 0.6415861901055839, 'learning_rate': 6.187738877616822e-07, 'epoch': 0.84} + 84%|████████▍ | 10297/12188 [39:25<3:44:13, 7.11s/it] 84%|████████▍ | 10298/12188 [39:32<3:44:56, 7.14s/it] {'loss': 0.291, 'grad_norm': 0.6863235373920054, 'learning_rate': 6.181337849164699e-07, 'epoch': 0.84} + 84%|████████▍ | 10298/12188 [39:32<3:44:56, 7.14s/it] 85%|████████▍ | 10299/12188 [39:40<3:47:19, 7.22s/it] {'loss': 0.3346, 'grad_norm': 0.7514103056175738, 'learning_rate': 6.174939915116107e-07, 'epoch': 0.84} + 85%|████████▍ | 10299/12188 [39:40<3:47:19, 7.22s/it] 85%|████████▍ | 10300/12188 [39:48<3:54:05, 7.44s/it] {'loss': 0.3125, 'grad_norm': 0.7081867987518959, 'learning_rate': 6.168545075922844e-07, 'epoch': 0.85} + 85%|████████▍ | 10300/12188 [39:48<3:54:05, 7.44s/it] 85%|████████▍ | 10301/12188 [39:55<3:48:22, 7.26s/it] {'loss': 0.2846, 'grad_norm': 0.7409659519538059, 'learning_rate': 6.162153332036503e-07, 'epoch': 0.85} + 85%|████████▍ | 10301/12188 [39:55<3:48:22, 7.26s/it] 85%|████████▍ | 10302/12188 [40:02<3:46:41, 7.21s/it] {'loss': 0.3045, 'grad_norm': 0.7396177731163746, 'learning_rate': 6.155764683908466e-07, 'epoch': 0.85} + 85%|████████▍ | 10302/12188 [40:02<3:46:41, 7.21s/it] 85%|████████▍ | 10303/12188 [40:09<3:45:09, 7.17s/it] {'loss': 0.3142, 'grad_norm': 0.7451049027228728, 'learning_rate': 6.14937913198988e-07, 'epoch': 0.85} + 85%|████████▍ | 10303/12188 [40:09<3:45:09, 7.17s/it] 85%|████████▍ | 10304/12188 [40:16<3:48:22, 7.27s/it] {'loss': 0.2975, 'grad_norm': 0.8456525521642978, 'learning_rate': 6.142996676731688e-07, 'epoch': 0.85} + 85%|████████▍ | 10304/12188 [40:16<3:48:22, 7.27s/it] 85%|████████▍ | 10305/12188 [40:23<3:42:41, 7.10s/it] {'loss': 0.3129, 'grad_norm': 0.6907353690712597, 'learning_rate': 6.13661731858462e-07, 'epoch': 0.85} + 85%|████████▍ | 10305/12188 [40:23<3:42:41, 7.10s/it] 85%|████████▍ | 10306/12188 [40:30<3:38:25, 6.96s/it] {'loss': 0.3383, 'grad_norm': 0.7942711129230485, 'learning_rate': 6.130241057999153e-07, 'epoch': 0.85} + 85%|████████▍ | 10306/12188 [40:30<3:38:25, 6.96s/it] 85%|████████▍ | 10307/12188 [40:36<3:37:38, 6.94s/it] {'loss': 0.3135, 'grad_norm': 0.7676972223397724, 'learning_rate': 6.123867895425589e-07, 'epoch': 0.85} + 85%|████████▍ | 10307/12188 [40:36<3:37:38, 6.94s/it] 85%|████████▍ | 10308/12188 [40:44<3:44:45, 7.17s/it] {'loss': 0.2957, 'grad_norm': 0.7317149537021855, 'learning_rate': 6.117497831313973e-07, 'epoch': 0.85} + 85%|████████▍ | 10308/12188 [40:44<3:44:45, 7.17s/it] 85%|████████▍ | 10309/12188 [40:51<3:42:55, 7.12s/it] {'loss': 0.2931, 'grad_norm': 0.6545693590098487, 'learning_rate': 6.111130866114162e-07, 'epoch': 0.85} + 85%|████████▍ | 10309/12188 [40:51<3:42:55, 7.12s/it] 85%|████████▍ | 10310/12188 [40:58<3:38:48, 6.99s/it] {'loss': 0.3433, 'grad_norm': 0.6718262637063853, 'learning_rate': 6.10476700027578e-07, 'epoch': 0.85} + 85%|████████▍ | 10310/12188 [40:58<3:38:48, 6.99s/it] 85%|████████▍ | 10311/12188 [41:05<3:42:04, 7.10s/it] {'loss': 0.2611, 'grad_norm': 0.7514351518092232, 'learning_rate': 6.098406234248222e-07, 'epoch': 0.85} + 85%|████████▍ | 10311/12188 [41:05<3:42:04, 7.10s/it] 85%|████████▍ | 10312/12188 [41:12<3:39:57, 7.03s/it] {'loss': 0.2922, 'grad_norm': 0.6873389613166435, 'learning_rate': 6.092048568480697e-07, 'epoch': 0.85} + 85%|████████▍ | 10312/12188 [41:12<3:39:57, 7.03s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f86b3b321b0> +[Try #0] Failed to fetch sample 4870604 in VC:s3://gui/OS-Atlas/desktop_domain/linux_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f86b3b321b0> +Problematic sample: {'image': 'output_20240912_153123_original_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on '跳至本页面的“常见问题解答”部分'"}, {'from': 'gpt', 'value': '\nclick(x=0.2504, y=0.3285)\n'}]} + 85%|████████▍ | 10313/12188 [41:20<3:48:26, 7.31s/it] {'loss': 0.2888, 'grad_norm': 0.7263446394855793, 'learning_rate': 6.085694003422144e-07, 'epoch': 0.85} + 85%|████████▍ | 10313/12188 [41:20<3:48:26, 7.31s/it] 85%|████████▍ | 10314/12188 [41:27<3:43:17, 7.15s/it] {'loss': 0.2936, 'grad_norm': 0.9560433472207943, 'learning_rate': 6.079342539521338e-07, 'epoch': 0.85} + 85%|████████▍ | 10314/12188 [41:27<3:43:17, 7.15s/it] 85%|████████▍ | 10315/12188 [41:34<3:45:33, 7.23s/it] {'loss': 0.2821, 'grad_norm': 1.174150993035231, 'learning_rate': 6.072994177226793e-07, 'epoch': 0.85} + 85%|████████▍ | 10315/12188 [41:34<3:45:33, 7.23s/it] 85%|████████▍ | 10316/12188 [41:41<3:45:47, 7.24s/it] {'loss': 0.3207, 'grad_norm': 0.6666592440101688, 'learning_rate': 6.066648916986823e-07, 'epoch': 0.85} + 85%|████████▍ | 10316/12188 [41:41<3:45:47, 7.24s/it] 85%|████████▍ | 10317/12188 [41:49<3:47:17, 7.29s/it] {'loss': 0.2913, 'grad_norm': 0.6967670564250161, 'learning_rate': 6.060306759249529e-07, 'epoch': 0.85} + 85%|████████▍ | 10317/12188 [41:49<3:47:17, 7.29s/it] 85%|████████▍ | 10318/12188 [41:56<3:42:51, 7.15s/it] {'loss': 0.2818, 'grad_norm': 0.7422502401289429, 'learning_rate': 6.053967704462766e-07, 'epoch': 0.85} + 85%|████████▍ | 10318/12188 [41:56<3:42:51, 7.15s/it] 85%|████████▍ | 10319/12188 [42:02<3:38:36, 7.02s/it] {'loss': 0.2758, 'grad_norm': 0.6762815904167873, 'learning_rate': 6.047631753074195e-07, 'epoch': 0.85} + 85%|████████▍ | 10319/12188 [42:02<3:38:36, 7.02s/it] 85%|████████▍ | 10320/12188 [42:10<3:43:02, 7.16s/it] {'loss': 0.327, 'grad_norm': 0.7076646502976661, 'learning_rate': 6.041298905531273e-07, 'epoch': 0.85} + 85%|████████▍ | 10320/12188 [42:10<3:43:02, 7.16s/it] 85%|████████▍ | 10321/12188 [42:17<3:44:31, 7.22s/it] {'loss': 0.3165, 'grad_norm': 0.7106696993092552, 'learning_rate': 6.034969162281184e-07, 'epoch': 0.85} + 85%|████████▍ | 10321/12188 [42:17<3:44:31, 7.22s/it] 85%|████████▍ | 10322/12188 [42:24<3:42:25, 7.15s/it] {'loss': 0.3057, 'grad_norm': 0.6365730877357193, 'learning_rate': 6.028642523770934e-07, 'epoch': 0.85} + 85%|██████��█▍ | 10322/12188 [42:24<3:42:25, 7.15s/it] 85%|████████▍ | 10323/12188 [42:32<3:43:23, 7.19s/it] {'loss': 0.2794, 'grad_norm': 0.7182426635378034, 'learning_rate': 6.022318990447318e-07, 'epoch': 0.85} + 85%|████████▍ | 10323/12188 [42:32<3:43:23, 7.19s/it] 85%|████████▍ | 10324/12188 [42:39<3:46:15, 7.28s/it] {'loss': 0.2942, 'grad_norm': 0.6793109703698211, 'learning_rate': 6.01599856275687e-07, 'epoch': 0.85} + 85%|████████▍ | 10324/12188 [42:39<3:46:15, 7.28s/it] 85%|████████▍ | 10325/12188 [42:49<4:12:36, 8.14s/it] {'loss': 0.3302, 'grad_norm': 0.6871029110257706, 'learning_rate': 6.009681241145943e-07, 'epoch': 0.85} + 85%|████████▍ | 10325/12188 [42:49<4:12:36, 8.14s/it] 85%|████████▍ | 10326/12188 [42:58<4:20:58, 8.41s/it] {'loss': 0.2837, 'grad_norm': 0.6636508446404542, 'learning_rate': 6.003367026060647e-07, 'epoch': 0.85} + 85%|████████▍ | 10326/12188 [42:58<4:20:58, 8.41s/it] 85%|████████▍ | 10327/12188 [43:05<4:05:52, 7.93s/it] {'loss': 0.2832, 'grad_norm': 0.7583597508292854, 'learning_rate': 5.997055917946893e-07, 'epoch': 0.85} + 85%|████████▍ | 10327/12188 [43:05<4:05:52, 7.93s/it] 85%|████████▍ | 10328/12188 [43:13<4:02:36, 7.83s/it] {'loss': 0.2933, 'grad_norm': 0.7403973476028978, 'learning_rate': 5.99074791725035e-07, 'epoch': 0.85} + 85%|████████▍ | 10328/12188 [43:13<4:02:36, 7.83s/it] 85%|████████▍ | 10329/12188 [43:21<4:11:17, 8.11s/it] {'loss': 0.2547, 'grad_norm': 0.6824317683244158, 'learning_rate': 5.984443024416476e-07, 'epoch': 0.85} + 85%|████████▍ | 10329/12188 [43:21<4:11:17, 8.11s/it] 85%|████████▍ | 10330/12188 [43:28<3:57:46, 7.68s/it] {'loss': 0.3157, 'grad_norm': 0.7145300573355446, 'learning_rate': 5.978141239890528e-07, 'epoch': 0.85} + 85%|████████▍ | 10330/12188 [43:28<3:57:46, 7.68s/it] 85%|████████▍ | 10331/12188 [43:36<3:55:59, 7.62s/it] {'loss': 0.3302, 'grad_norm': 0.676948625599957, 'learning_rate': 5.971842564117513e-07, 'epoch': 0.85} + 85%|████████▍ | 10331/12188 [43:36<3:55:59, 7.62s/it] 85%|████████▍ | 10332/12188 [43:43<3:49:50, 7.43s/it] {'loss': 0.2751, 'grad_norm': 0.6754291167553998, 'learning_rate': 5.965546997542238e-07, 'epoch': 0.85} + 85%|████████▍ | 10332/12188 [43:43<3:49:50, 7.43s/it] 85%|████████▍ | 10333/12188 [43:50<3:53:15, 7.54s/it] {'loss': 0.3445, 'grad_norm': 0.6786581956329659, 'learning_rate': 5.959254540609294e-07, 'epoch': 0.85} + 85%|████████▍ | 10333/12188 [43:50<3:53:15, 7.54s/it] 85%|████████▍ | 10334/12188 [43:57<3:47:40, 7.37s/it] {'loss': 0.2758, 'grad_norm': 0.6737049894021191, 'learning_rate': 5.952965193763028e-07, 'epoch': 0.85} + 85%|████████▍ | 10334/12188 [43:57<3:47:40, 7.37s/it] 85%|████████▍ | 10335/12188 [44:05<3:51:36, 7.50s/it] {'loss': 0.3229, 'grad_norm': 0.7197992495453511, 'learning_rate': 5.946678957447605e-07, 'epoch': 0.85} + 85%|████████▍ | 10335/12188 [44:05<3:51:36, 7.50s/it] 85%|████████▍ | 10336/12188 [44:13<3:52:40, 7.54s/it] {'loss': 0.3066, 'grad_norm': 0.6959951580659347, 'learning_rate': 5.940395832106926e-07, 'epoch': 0.85} + 85%|████████▍ | 10336/12188 [44:13<3:52:40, 7.54s/it] 85%|████████▍ | 10337/12188 [44:20<3:50:26, 7.47s/it] {'loss': 0.3332, 'grad_norm': 0.7065557432524467, 'learning_rate': 5.93411581818471e-07, 'epoch': 0.85} + 85%|████████▍ | 10337/12188 [44:20<3:50:26, 7.47s/it] 85%|████████▍ | 10338/12188 [44:27<3:43:26, 7.25s/it] {'loss': 0.3231, 'grad_norm': 0.6668244160267615, 'learning_rate': 5.927838916124445e-07, 'epoch': 0.85} + 85%|████████▍ | 10338/12188 [44:27<3:43:26, 7.25s/it] 85%|████████▍ | 10339/12188 [44:34<3:44:41, 7.29s/it] {'loss': 0.3141, 'grad_norm': 0.6948925687689692, 'learning_rate': 5.921565126369378e-07, 'epoch': 0.85} + 85%|████████▍ | 10339/12188 [44:34<3:44:41, 7.29s/it] 85%|████████▍ | 10340/12188 [44:41<3:38:16, 7.09s/it] {'loss': 0.3005, 'grad_norm': 0.7881433357778956, 'learning_rate': 5.915294449362574e-07, 'epoch': 0.85} + 85%|████████▍ | 10340/12188 [44:41<3:38:16, 7.09s/it] 85%|████████▍ | 10341/12188 [44:50<4:01:36, 7.85s/it] {'loss': 0.3064, 'grad_norm': 0.689177536792203, 'learning_rate': 5.909026885546837e-07, 'epoch': 0.85} + 85%|████████▍ | 10341/12188 [44:50<4:01:36, 7.85s/it] 85%|████████▍ | 10342/12188 [44:57<3:54:11, 7.61s/it] {'loss': 0.3106, 'grad_norm': 0.6184000686476253, 'learning_rate': 5.902762435364795e-07, 'epoch': 0.85} + 85%|████████▍ | 10342/12188 [44:57<3:54:11, 7.61s/it] 85%|████████▍ | 10343/12188 [45:04<3:48:00, 7.42s/it] {'loss': 0.2891, 'grad_norm': 0.6723226245605436, 'learning_rate': 5.896501099258822e-07, 'epoch': 0.85} + 85%|████████▍ | 10343/12188 [45:04<3:48:00, 7.42s/it] 85%|████████▍ | 10344/12188 [45:11<3:40:12, 7.16s/it] {'loss': 0.2736, 'grad_norm': 0.7157464773337323, 'learning_rate': 5.890242877671077e-07, 'epoch': 0.85} + 85%|████████▍ | 10344/12188 [45:11<3:40:12, 7.16s/it] 85%|████████▍ | 10345/12188 [45:19<3:45:11, 7.33s/it] {'loss': 0.2626, 'grad_norm': 0.6191161955334409, 'learning_rate': 5.883987771043509e-07, 'epoch': 0.85} + 85%|████████▍ | 10345/12188 [45:19<3:45:11, 7.33s/it] 85%|████████▍ | 10346/12188 [45:26<3:41:06, 7.20s/it] {'loss': 0.2879, 'grad_norm': 0.6982879962839887, 'learning_rate': 5.877735779817861e-07, 'epoch': 0.85} + 85%|████████▍ | 10346/12188 [45:26<3:41:06, 7.20s/it] 85%|████████▍ | 10347/12188 [45:33<3:40:09, 7.18s/it] {'loss': 0.3137, 'grad_norm': 0.7254660883829825, 'learning_rate': 5.871486904435608e-07, 'epoch': 0.85} + 85%|████████▍ | 10347/12188 [45:33<3:40:09, 7.18s/it] 85%|████████▍ | 10348/12188 [45:40<3:42:31, 7.26s/it] {'loss': 0.2579, 'grad_norm': 0.8140414395498133, 'learning_rate': 5.865241145338063e-07, 'epoch': 0.85} + 85%|████████▍ | 10348/12188 [45:40<3:42:31, 7.26s/it] 85%|████████▍ | 10349/12188 [45:48<3:47:25, 7.42s/it] {'loss': 0.2872, 'grad_norm': 0.8398165246522348, 'learning_rate': 5.858998502966273e-07, 'epoch': 0.85} + 85%|████████▍ | 10349/12188 [45:48<3:47:25, 7.42s/it] 85%|████████▍ | 10350/12188 [45:55<3:43:37, 7.30s/it] {'loss': 0.2889, 'grad_norm': 0.7033980960777022, 'learning_rate': 5.852758977761091e-07, 'epoch': 0.85} + 85%|████████▍ | 10350/12188 [45:55<3:43:37, 7.30s/it] 85%|████████▍ | 10351/12188 [46:02<3:43:06, 7.29s/it] {'loss': 0.3286, 'grad_norm': 0.7967884476590983, 'learning_rate': 5.846522570163155e-07, 'epoch': 0.85} + 85%|████████▍ | 10351/12188 [46:02<3:43:06, 7.29s/it] 85%|████████▍ | 10352/12188 [46:09<3:39:21, 7.17s/it] {'loss': 0.3204, 'grad_norm': 0.6847923141296046, 'learning_rate': 5.840289280612837e-07, 'epoch': 0.85} + 85%|████████▍ | 10352/12188 [46:09<3:39:21, 7.17s/it] 85%|████████▍ | 10353/12188 [46:16<3:33:43, 6.99s/it] {'loss': 0.2521, 'grad_norm': 0.6938112150598861, 'learning_rate': 5.83405910955036e-07, 'epoch': 0.85} + 85%|████████▍ | 10353/12188 [46:16<3:33:43, 6.99s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 85%|████████▍ | 10354/12188 [46:22<3:28:58, 6.84s/it] {'loss': 0.6415, 'grad_norm': 0.8746726983025239, 'learning_rate': 5.827832057415661e-07, 'epoch': 0.85} + 85%|████████▍ | 10354/12188 [46:22<3:28:58, 6.84s/it] 85%|████████▍ | 10355/12188 [46:30<3:35:43, 7.06s/it] {'loss': 0.2968, 'grad_norm': 0.675906931972105, 'learning_rate': 5.821608124648504e-07, 'epoch': 0.85} + 85%|████████▍ | 10355/12188 [46:30<3:35:43, 7.06s/it] 85%|████████▍ | 10356/12188 [46:38<3:41:43, 7.26s/it] {'loss': 0.304, 'grad_norm': 0.7393964352992161, 'learning_rate': 5.815387311688398e-07, 'epoch': 0.85} + 85%|████████▍ | 10356/12188 [46:38<3:41:43, 7.26s/it] 85%|████████▍ | 10357/12188 [46:44<3:34:50, 7.04s/it] {'loss': 0.3053, 'grad_norm': 0.7719156459107507, 'learning_rate': 5.809169618974647e-07, 'epoch': 0.85} + 85%|████████▍ | 10357/12188 [46:44<3:34:50, 7.04s/it] 85%|████████▍ | 10358/12188 [46:51<3:37:05, 7.12s/it] {'loss': 0.2852, 'grad_norm': 0.7124587662251588, 'learning_rate': 5.802955046946335e-07, 'epoch': 0.85} + 85%|████████▍ | 10358/12188 [46:51<3:37:05, 7.12s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1348, in _get_item + raise ValueError( +ValueError: Number of image tokens ['data/bottom-navigation/other_screenshot/original/ProductivityBottomNavigation_1739983263.5379376.png'] does not match number of images None +[Try #0] Failed to fetch sample 1868482 in VC:s3://gui-agent/jedi/images/component_v1_130k/component_v1_130k_extracted/. Exception: Number of image tokens ['data/bottom-navigation/other_screenshot/original/ProductivityBottomNavigation_1739983263.5379376.png'] does not match number of images None +Problematic sample: {'image': 'data/bottom-navigation/other_screenshot/original/ProductivityBottomNavigation_1739983263.5379376.png', 'conversations': [], 'image_id': 'data/bottom-navigation/other_screenshot/original/ProductivityBottomNavigation_1739983263.5379376.png'} + 85%|████████▍ | 10359/12188 [46:58<3:35:16, 7.06s/it] {'loss': 0.2961, 'grad_norm': 0.7473451957541791, 'learning_rate': 5.796743596042342e-07, 'epoch': 0.85} + 85%|████████▍ | 10359/12188 [46:58<3:35:16, 7.06s/it] 85%|████████▌ | 10360/12188 [47:05<3:35:46, 7.08s/it] {'loss': 0.2428, 'grad_norm': 0.6646585996321275, 'learning_rate': 5.790535266701291e-07, 'epoch': 0.85} + 85%|████████▌ | 10360/12188 [47:05<3:35:46, 7.08s/it] 85%|████████▌ | 10361/12188 [47:12<3:34:25, 7.04s/it] {'loss': 0.3098, 'grad_norm': 0.7224967042457274, 'learning_rate': 5.78433005936162e-07, 'epoch': 0.85} + 85%|████████▌ | 10361/12188 [47:12<3:34:25, 7.04s/it] 85%|████████▌ | 10362/12188 [47:19<3:31:55, 6.96s/it] {'loss': 0.277, 'grad_norm': 0.8805678172973438, 'learning_rate': 5.778127974461511e-07, 'epoch': 0.85} + 85%|████████▌ | 10362/12188 [47:19<3:31:55, 6.96s/it] 85%|████████▌ | 10363/12188 [47:27<3:38:06, 7.17s/it] {'loss': 0.2753, 'grad_norm': 0.7434190265967167, 'learning_rate': 5.77192901243896e-07, 'epoch': 0.85} + 85%|████████▌ | 10363/12188 [47:27<3:38:06, 7.17s/it] 85%|████████▌ | 10364/12188 [47:34<3:34:36, 7.06s/it] {'loss': 0.3184, 'grad_norm': 0.6670975941892408, 'learning_rate': 5.765733173731731e-07, 'epoch': 0.85} + 85%|████████▌ | 10364/12188 [47:34<3:34:36, 7.06s/it] 85%|████████▌ | 10365/12188 [47:40<3:30:46, 6.94s/it] {'loss': 0.3428, 'grad_norm': 0.8922221508581745, 'learning_rate': 5.759540458777346e-07, 'epoch': 0.85} + 85%|████████▌ | 10365/12188 [47:40<3:30:46, 6.94s/it] 85%|████████▌ | 10366/12188 [47:47<3:31:18, 6.96s/it] {'loss': 0.2685, 'grad_norm': 0.7634669068096223, 'learning_rate': 5.753350868013147e-07, 'epoch': 0.85} + 85%|████████▌ | 10366/12188 [47:47<3:31:18, 6.96s/it] 85%|████████▌ | 10367/12188 [47:54<3:29:25, 6.90s/it] {'loss': 0.3438, 'grad_norm': 0.7811381773120197, 'learning_rate': 5.747164401876215e-07, 'epoch': 0.85} + 85%|████████▌ | 10367/12188 [47:54<3:29:25, 6.90s/it] 85%|████████▌ | 10368/12188 [48:01<3:30:05, 6.93s/it] {'loss': 0.2865, 'grad_norm': 0.7741630070211862, 'learning_rate': 5.740981060803441e-07, 'epoch': 0.85} + 85%|████████▌ | 10368/12188 [48:01<3:30:05, 6.93s/it] 85%|████████▌ | 10369/12188 [48:08<3:30:00, 6.93s/it] {'loss': 0.3109, 'grad_norm': 0.730691477818792, 'learning_rate': 5.73480084523147e-07, 'epoch': 0.85} + 85%|████████▌ | 10369/12188 [48:08<3:30:00, 6.93s/it] 85%|████████▌ | 10370/12188 [48:17<3:51:48, 7.65s/it] {'loss': 0.2911, 'grad_norm': 0.6843972280886106, 'learning_rate': 5.728623755596757e-07, 'epoch': 0.85} + 85%|████████▌ | 10370/12188 [48:17<3:51:48, 7.65s/it] 85%|████████▌ | 10371/12188 [48:24<3:47:44, 7.52s/it] {'loss': 0.3395, 'grad_norm': 0.7214894241032824, 'learning_rate': 5.722449792335505e-07, 'epoch': 0.85} + 85%|████████▌ | 10371/12188 [48:24<3:47:44, 7.52s/it] 85%|████████▌ | 10372/12188 [48:33<3:57:07, 7.83s/it] {'loss': 0.2972, 'grad_norm': 0.6841000824423861, 'learning_rate': 5.716278955883703e-07, 'epoch': 0.85} + 85%|████████▌ | 10372/12188 [48:33<3:57:07, 7.83s/it] 85%|████████▌ | 10373/12188 [48:40<3:46:52, 7.50s/it] {'loss': 0.2745, 'grad_norm': 0.7173545720331425, 'learning_rate': 5.71011124667713e-07, 'epoch': 0.85} + 85%|████████▌ | 10373/12188 [48:40<3:46:52, 7.50s/it] 85%|████████▌ | 10374/12188 [48:47<3:47:08, 7.51s/it] {'loss': 0.2711, 'grad_norm': 0.7533159427893631, 'learning_rate': 5.703946665151356e-07, 'epoch': 0.85} + 85%|████████▌ | 10374/12188 [48:47<3:47:08, 7.51s/it] 85%|████████▌ | 10375/12188 [48:54<3:42:24, 7.36s/it] {'loss': 0.2904, 'grad_norm': 0.8008212491358541, 'learning_rate': 5.697785211741691e-07, 'epoch': 0.85} + 85%|████████▌ | 10375/12188 [48:54<3:42:24, 7.36s/it] 85%|████████▌ | 10376/12188 [49:01<3:37:01, 7.19s/it] {'loss': 0.3097, 'grad_norm': 0.6332382642201803, 'learning_rate': 5.691626886883261e-07, 'epoch': 0.85} + 85%|████████▌ | 10376/12188 [49:01<3:37:01, 7.19s/it] 85%|████████▌ | 10377/12188 [49:11<4:00:37, 7.97s/it] {'loss': 0.2954, 'grad_norm': 0.808769773638127, 'learning_rate': 5.685471691010958e-07, 'epoch': 0.85} + 85%|████████▌ | 10377/12188 [49:11<4:00:37, 7.97s/it] 85%|████████▌ | 10378/12188 [49:19<3:57:37, 7.88s/it] {'loss': 0.29, 'grad_norm': 0.7136511455399368, 'learning_rate': 5.679319624559443e-07, 'epoch': 0.85} + 85%|████████▌ | 10378/12188 [49:19<3:57:37, 7.88s/it] 85%|████████▌ | 10379/12188 [49:26<3:54:18, 7.77s/it] {'loss': 0.3061, 'grad_norm': 0.7773052339712684, 'learning_rate': 5.673170687963175e-07, 'epoch': 0.85} + 85%|████████▌ | 10379/12188 [49:26<3:54:18, 7.77s/it] 85%|████████▌ | 10380/12188 [49:33<3:48:14, 7.57s/it] {'loss': 0.2799, 'grad_norm': 0.689051657095143, 'learning_rate': 5.667024881656369e-07, 'epoch': 0.85} + 85%|████████▌ | 10380/12188 [49:33<3:48:14, 7.57s/it] 85%|████████▌ | 10381/12188 [49:40<3:41:00, 7.34s/it] {'loss': 0.287, 'grad_norm': 0.7232216328027082, 'learning_rate': 5.660882206073037e-07, 'epoch': 0.85} + 85%|████████▌ | 10381/12188 [49:40<3:41:00, 7.34s/it] 85%|████████▌ | 10382/12188 [49:50<4:02:21, 8.05s/it] {'loss': 0.3167, 'grad_norm': 0.7501097400275905, 'learning_rate': 5.654742661646978e-07, 'epoch': 0.85} + 85%|████████▌ | 10382/12188 [49:50<4:02:21, 8.05s/it] 85%|████████▌ | 10383/12188 [49:57<3:52:01, 7.71s/it] {'loss': 0.2939, 'grad_norm': 0.9596109558075638, 'learning_rate': 5.64860624881175e-07, 'epoch': 0.85} + 85%|████████▌ | 10383/12188 [49:57<3:52:01, 7.71s/it] 85%|████████▌ | 10384/12188 [50:05<4:01:30, 8.03s/it] {'loss': 0.2796, 'grad_norm': 0.6990051737265758, 'learning_rate': 5.64247296800069e-07, 'epoch': 0.85} + 85%|████████▌ | 10384/12188 [50:05<4:01:30, 8.03s/it] 85%|██��█████▌ | 10385/12188 [50:12<3:48:22, 7.60s/it] {'loss': 0.3101, 'grad_norm': 0.6882109832475108, 'learning_rate': 5.636342819646912e-07, 'epoch': 0.85} + 85%|████████▌ | 10385/12188 [50:12<3:48:22, 7.60s/it] 85%|████████▌ | 10386/12188 [50:19<3:44:09, 7.46s/it] {'loss': 0.3055, 'grad_norm': 0.738576050975873, 'learning_rate': 5.630215804183325e-07, 'epoch': 0.85} + 85%|████████▌ | 10386/12188 [50:19<3:44:09, 7.46s/it] 85%|████████▌ | 10387/12188 [50:26<3:37:24, 7.24s/it] {'loss': 0.3318, 'grad_norm': 0.7282349881948008, 'learning_rate': 5.624091922042629e-07, 'epoch': 0.85} + 85%|████████▌ | 10387/12188 [50:26<3:37:24, 7.24s/it] 85%|████████▌ | 10388/12188 [50:33<3:34:32, 7.15s/it] {'loss': 0.3025, 'grad_norm': 1.2752519505852564, 'learning_rate': 5.617971173657255e-07, 'epoch': 0.85} + 85%|████████▌ | 10388/12188 [50:33<3:34:32, 7.15s/it] 85%|████████▌ | 10389/12188 [50:40<3:33:43, 7.13s/it] {'loss': 0.2831, 'grad_norm': 0.7337812530892882, 'learning_rate': 5.611853559459457e-07, 'epoch': 0.85} + 85%|████████▌ | 10389/12188 [50:40<3:33:43, 7.13s/it] 85%|████████▌ | 10390/12188 [50:47<3:36:59, 7.24s/it] {'loss': 0.2801, 'grad_norm': 0.6771483238022135, 'learning_rate': 5.60573907988124e-07, 'epoch': 0.85} + 85%|████████▌ | 10390/12188 [50:47<3:36:59, 7.24s/it] 85%|████████▌ | 10391/12188 [50:54<3:32:56, 7.11s/it] {'loss': 0.3188, 'grad_norm': 0.6733377911425513, 'learning_rate': 5.599627735354408e-07, 'epoch': 0.85} + 85%|████████▌ | 10391/12188 [50:54<3:32:56, 7.11s/it] 85%|████████▌ | 10392/12188 [51:01<3:33:29, 7.13s/it] {'loss': 0.2765, 'grad_norm': 0.7142006415641616, 'learning_rate': 5.593519526310532e-07, 'epoch': 0.85} + 85%|████████▌ | 10392/12188 [51:01<3:33:29, 7.13s/it] 85%|████████▌ | 10393/12188 [51:08<3:32:35, 7.11s/it] {'loss': 0.2562, 'grad_norm': 0.6823113635037485, 'learning_rate': 5.587414453180956e-07, 'epoch': 0.85} + 85%|████████▌ | 10393/12188 [51:08<3:32:35, 7.11s/it] 85%|████████▌ | 10394/12188 [51:15<3:27:59, 6.96s/it] {'loss': 0.3054, 'grad_norm': 0.7053413624138989, 'learning_rate': 5.58131251639682e-07, 'epoch': 0.85} + 85%|████████▌ | 10394/12188 [51:15<3:27:59, 6.96s/it] 85%|████████▌ | 10395/12188 [51:22<3:27:20, 6.94s/it] {'loss': 0.2913, 'grad_norm': 0.7256324314670473, 'learning_rate': 5.575213716389039e-07, 'epoch': 0.85} + 85%|████████▌ | 10395/12188 [51:22<3:27:20, 6.94s/it] 85%|████████▌ | 10396/12188 [51:29<3:32:34, 7.12s/it] {'loss': 0.33, 'grad_norm': 0.7733106218325457, 'learning_rate': 5.569118053588291e-07, 'epoch': 0.85} + 85%|████████▌ | 10396/12188 [51:29<3:32:34, 7.12s/it] 85%|████████▌ | 10397/12188 [51:36<3:30:05, 7.04s/it] {'loss': 0.2928, 'grad_norm': 0.663725566131841, 'learning_rate': 5.563025528425031e-07, 'epoch': 0.85} + 85%|████████▌ | 10397/12188 [51:36<3:30:05, 7.04s/it] 85%|████████▌ | 10398/12188 [51:43<3:26:03, 6.91s/it] {'loss': 0.3208, 'grad_norm': 0.637417870385388, 'learning_rate': 5.556936141329521e-07, 'epoch': 0.85} + 85%|████████▌ | 10398/12188 [51:43<3:26:03, 6.91s/it] 85%|████████▌ | 10399/12188 [51:50<3:24:17, 6.85s/it] {'loss': 0.2866, 'grad_norm': 0.672075368016385, 'learning_rate': 5.550849892731774e-07, 'epoch': 0.85} + 85%|████████▌ | 10399/12188 [51:50<3:24:17, 6.85s/it] 85%|████████▌ | 10400/12188 [51:57<3:25:33, 6.90s/it] {'loss': 0.3034, 'grad_norm': 0.9678121371242107, 'learning_rate': 5.5447667830616e-07, 'epoch': 0.85} + 85%|████████▌ | 10400/12188 [51:57<3:25:33, 6.90s/it] 85%|████████▌ | 10401/12188 [52:04<3:25:45, 6.91s/it] {'loss': 0.3001, 'grad_norm': 0.6959826522019326, 'learning_rate': 5.538686812748567e-07, 'epoch': 0.85} + 85%|████████▌ | 10401/12188 [52:04<3:25:45, 6.91s/it] 85%|████████▌ | 10402/12188 [52:11<3:31:09, 7.09s/it] {'loss': 0.2969, 'grad_norm': 0.7155844514765383, 'learning_rate': 5.532609982222048e-07, 'epoch': 0.85} + 85%|████████▌ | 10402/12188 [52:11<3:31:09, 7.09s/it] 85%|████████▌ | 10403/12188 [52:18<3:33:47, 7.19s/it] {'loss': 0.323, 'grad_norm': 0.6768757724410063, 'learning_rate': 5.526536291911161e-07, 'epoch': 0.85} + 85%|████████▌ | 10403/12188 [52:19<3:33:47, 7.19s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1348, in _get_item + raise ValueError( +ValueError: Number of image tokens ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'] does not match number of images None +[Try #0] Failed to fetch sample 1096386 in VC:s3://gui/aguvis/aguvis-stage2/amex/images. Exception: Number of image tokens ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'] does not match number of images None +Problematic sample: {'image': ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'], 'conversations': [{'from': 'human', 'value': "\nPlease generate the next move according to the UI screenshot, the task and previous operations.\n\nTask:\nI want to book a hotel in london, prize should be less than $600, guest rating is 8+, 4 star rating, breakfast included\n\nPrevious operations:\nStep 1: Tap on the Chrome app to start searching for hotels in London.\nStep 2: Type 'Chrome' in the search bar to open the Chrome browser.\nStep 3: Tap on the Hotels.com app icon to begin searching for a hotel.\nStep 4: Wait for the Hotels.com app to finish loading to proceed with the hotel search.\nStep 5: Tap on the 'Going to' field to enter London as the destination.\nStep 6: Tap on 'London' from Recent Searches to select it as the destination for the hotel search.\nStep 7: Tap on 'London, England, United Kingdom' to select it as the destination for the hotel search.\nStep 8: Tap on the 'Search' button to view available hotels in London.\nStep 9: Tap on the filter icon or option to apply filters to the hotel search results.\nStep 10: Swipe up on the screen to reveal more filter options, such as guest rating and hotel stars.\nStep 11: Swipe up to reveal more filter options, specifically guest ratings and hotel stars.\nStep 12: Tap on the 4-star rating box to select it.\nStep 13: Tap 'Done' to confirm the current filter selections and proceed.\nStep 14: Swipe up on the screen to locate and select the 'breakfast included' filter option.\nStep 15: Tap on 'Breakfast included' to select it.\nStep 16: Tap on the 'Done' button to apply the filters and view available hotel options.\nStep 17: Tap on the topmost hotel card to view its details.\nStep 18: Tap on the 'Select a room' button to check available room options and their prices.\nStep 19: Tap on 'Select and customize' to proceed with the room selection and customization options.\nStep 20: Tap on 'Breakfast buffet' to select it, then tap on 'Reserve' to book the room."}, {'from': 'gpt', 'value': "\nGiven the sequence of booking actions, it seems the process is almost complete. The loading sign indicates that the system is processing the final booking or confirmation step.\n\n\nWait for the loading to complete to confirm the booking status.\n\n\nterminate(status='success')\n"}]} + 85%|████████▌ | 10404/12188 [52:27<3:41:03, 7.43s/it] {'loss': 0.3092, 'grad_norm': 0.7296279551412693, 'learning_rate': 5.520465742244824e-07, 'epoch': 0.85} + 85%|████████▌ | 10404/12188 [52:27<3:41:03, 7.43s/it] 85%|████████▌ | 10405/12188 [52:33<3:34:51, 7.23s/it] {'loss': 0.3184, 'grad_norm': 0.6937764288173239, 'learning_rate': 5.51439833365175e-07, 'epoch': 0.85} + 85%|████████▌ | 10405/12188 [52:33<3:34:51, 7.23s/it] 85%|████████▌ | 10406/12188 [52:45<4:14:50, 8.58s/it] {'loss': 0.2369, 'grad_norm': 0.6455647372297885, 'learning_rate': 5.508334066560377e-07, 'epoch': 0.85} + 85%|████████▌ | 10406/12188 [52:45<4:14:50, 8.58s/it] 85%|████████▌ | 10407/12188 [52:52<4:04:53, 8.25s/it] {'loss': 0.3063, 'grad_norm': 0.7093104224795661, 'learning_rate': 5.50227294139899e-07, 'epoch': 0.85} + 85%|████████▌ | 10407/12188 [52:52<4:04:53, 8.25s/it] 85%|████████▌ | 10408/12188 [53:00<3:56:48, 7.98s/it] {'loss': 0.29, 'grad_norm': 0.7435016959274044, 'learning_rate': 5.496214958595581e-07, 'epoch': 0.85} + 85%|████████▌ | 10408/12188 [53:00<3:56:48, 7.98s/it] 85%|████████▌ | 10409/12188 [53:07<3:52:25, 7.84s/it] {'loss': 0.2765, 'grad_norm': 0.6513665092075935, 'learning_rate': 5.490160118577969e-07, 'epoch': 0.85} + 85%|████████▌ | 10409/12188 [53:07<3:52:25, 7.84s/it] 85%|████████▌ | 10410/12188 [53:15<3:46:38, 7.65s/it] {'loss': 0.2731, 'grad_norm': 0.7216764442312253, 'learning_rate': 5.484108421773754e-07, 'epoch': 0.85} + 85%|████████▌ | 10410/12188 [53:15<3:46:38, 7.65s/it] 85%|████████▌ | 10411/12188 [53:23<3:55:35, 7.95s/it] {'loss': 0.3081, 'grad_norm': 0.7255128148184277, 'learning_rate': 5.47805986861028e-07, 'epoch': 0.85} + 85%|████████▌ | 10411/12188 [53:23<3:55:35, 7.95s/it] 85%|████████▌ | 10412/12188 [53:30<3:44:52, 7.60s/it] {'loss': 0.3108, 'grad_norm': 0.6467663542019405, 'learning_rate': 5.47201445951468e-07, 'epoch': 0.85} + 85%|████████▌ | 10412/12188 [53:30<3:44:52, 7.60s/it] 85%|████████▌ | 10413/12188 [53:37<3:41:12, 7.48s/it] {'loss': 0.2914, 'grad_norm': 0.6507039516963568, 'learning_rate': 5.465972194913882e-07, 'epoch': 0.85} + 85%|████████▌ | 10413/12188 [53:37<3:41:12, 7.48s/it] 85%|████████▌ | 10414/12188 [53:44<3:38:47, 7.40s/it] {'loss': 0.2723, 'grad_norm': 0.7062312416244554, 'learning_rate': 5.459933075234575e-07, 'epoch': 0.85} + 85%|████████▌ | 10414/12188 [53:44<3:38:47, 7.40s/it] 85%|████████▌ | 10415/12188 [53:51<3:33:02, 7.21s/it] {'loss': 0.3372, 'grad_norm': 0.751660885160818, 'learning_rate': 5.453897100903244e-07, 'epoch': 0.85} + 85%|████████▌ | 10415/12188 [53:51<3:33:02, 7.21s/it] 85%|████████▌ | 10416/12188 [53:58<3:30:43, 7.13s/it] {'loss': 0.3038, 'grad_norm': 0.7207354452203772, 'learning_rate': 5.447864272346121e-07, 'epoch': 0.85} + 85%|████████▌ | 10416/12188 [53:58<3:30:43, 7.13s/it] 85%|████████▌ | 10417/12188 [54:05<3:31:18, 7.16s/it] {'loss': 0.3336, 'grad_norm': 0.6851090411645719, 'learning_rate': 5.441834589989242e-07, 'epoch': 0.85} + 85%|████████▌ | 10417/12188 [54:05<3:31:18, 7.16s/it] 85%|████████▌ | 10418/12188 [54:12<3:29:12, 7.09s/it] {'loss': 0.2746, 'grad_norm': 0.6923711494242786, 'learning_rate': 5.435808054258429e-07, 'epoch': 0.85} + 85%|████████▌ | 10418/12188 [54:12<3:29:12, 7.09s/it] 85%|████████▌ | 10419/12188 [54:19<3:25:25, 6.97s/it] {'loss': 0.2978, 'grad_norm': 1.184699320666336, 'learning_rate': 5.42978466557924e-07, 'epoch': 0.85} + 85%|████████▌ | 10419/12188 [54:19<3:25:25, 6.97s/it] 85%|████████▌ | 10420/12188 [54:26<3:24:40, 6.95s/it] {'loss': 0.3074, 'grad_norm': 0.7170363146273162, 'learning_rate': 5.423764424377065e-07, 'epoch': 0.85} + 85%|████████▌ | 10420/12188 [54:26<3:24:40, 6.95s/it] 86%|████████▌ | 10421/12188 [54:34<3:32:36, 7.22s/it] {'loss': 0.28, 'grad_norm': 0.6410723233874112, 'learning_rate': 5.417747331077017e-07, 'epoch': 0.85} + 86%|████████▌ | 10421/12188 [54:34<3:32:36, 7.22s/it] 86%|████████▌ | 10422/12188 [54:41<3:33:09, 7.24s/it] {'loss': 0.3199, 'grad_norm': 0.7475569488886581, 'learning_rate': 5.411733386104029e-07, 'epoch': 0.86} + 86%|████████▌ | 10422/12188 [54:41<3:33:09, 7.24s/it] 86%|████████▌ | 10423/12188 [54:48<3:30:52, 7.17s/it] {'loss': 0.2626, 'grad_norm': 0.7784059692288424, 'learning_rate': 5.4057225898828e-07, 'epoch': 0.86} + 86%|████████▌ | 10423/12188 [54:48<3:30:52, 7.17s/it] 86%|████████▌ | 10424/12188 [54:59<4:05:02, 8.33s/it] {'loss': 0.3036, 'grad_norm': 0.676229221665994, 'learning_rate': 5.399714942837792e-07, 'epoch': 0.86} + 86%|████████▌ | 10424/12188 [54:59<4:05:02, 8.33s/it] 86%|████████▌ | 10425/12188 [55:06<3:51:26, 7.88s/it] {'loss': 0.3172, 'grad_norm': 0.63107818676154, 'learning_rate': 5.393710445393252e-07, 'epoch': 0.86} + 86%|████████▌ | 10425/12188 [55:06<3:51:26, 7.88s/it] 86%|████████▌ | 10426/12188 [55:13<3:44:01, 7.63s/it] {'loss': 0.2794, 'grad_norm': 0.6733837244533976, 'learning_rate': 5.387709097973226e-07, 'epoch': 0.86} + 86%|████████▌ | 10426/12188 [55:13<3:44:01, 7.63s/it] 86%|████████▌ | 10427/12188 [55:20<3:36:41, 7.38s/it] {'loss': 0.3341, 'grad_norm': 0.7196352233810506, 'learning_rate': 5.381710901001497e-07, 'epoch': 0.86} + 86%|████████▌ | 10427/12188 [55:20<3:36:41, 7.38s/it] 86%|████████▌ | 10428/12188 [55:27<3:36:23, 7.38s/it] {'loss': 0.2916, 'grad_norm': 0.7410974156768929, 'learning_rate': 5.37571585490167e-07, 'epoch': 0.86} + 86%|████████▌ | 10428/12188 [55:27<3:36:23, 7.38s/it] 86%|████████▌ | 10429/12188 [55:34<3:31:42, 7.22s/it] {'loss': 0.3144, 'grad_norm': 0.6476985843772657, 'learning_rate': 5.369723960097084e-07, 'epoch': 0.86} + 86%|████████▌ | 10429/12188 [55:34<3:31:42, 7.22s/it] 86%|████████▌ | 10430/12188 [55:42<3:37:46, 7.43s/it] {'loss': 0.3241, 'grad_norm': 0.6986168184276266, 'learning_rate': 5.36373521701089e-07, 'epoch': 0.86} + 86%|████████▌ | 10430/12188 [55:42<3:37:46, 7.43s/it] 86%|████████▌ | 10431/12188 [55:49<3:35:53, 7.37s/it] {'loss': 0.2971, 'grad_norm': 0.7423867345535274, 'learning_rate': 5.357749626066011e-07, 'epoch': 0.86} + 86%|████████▌ | 10431/12188 [55:49<3:35:53, 7.37s/it] 86%|████████▌ | 10432/12188 [55:56<3:32:40, 7.27s/it] {'loss': 0.2865, 'grad_norm': 0.6930744922327944, 'learning_rate': 5.351767187685114e-07, 'epoch': 0.86} + 86%|████████▌ | 10432/12188 [55:56<3:32:40, 7.27s/it] 86%|████████▌ | 10433/12188 [56:03<3:29:53, 7.18s/it] {'loss': 0.267, 'grad_norm': 0.6738334852577574, 'learning_rate': 5.345787902290706e-07, 'epoch': 0.86} + 86%|████████▌ | 10433/12188 [56:03<3:29:53, 7.18s/it] 86%|████████▌ | 10434/12188 [56:10<3:31:41, 7.24s/it] {'loss': 0.2943, 'grad_norm': 0.7207802429804949, 'learning_rate': 5.339811770304992e-07, 'epoch': 0.86} + 86%|████████▌ | 10434/12188 [56:10<3:31:41, 7.24s/it] 86%|████████▌ | 10435/12188 [56:18<3:30:31, 7.21s/it] {'loss': 0.27, 'grad_norm': 0.7814123653149324, 'learning_rate': 5.333838792150026e-07, 'epoch': 0.86} + 86%|████████▌ | 10435/12188 [56:18<3:30:31, 7.21s/it] 86%|████████▌ | 10436/12188 [56:25<3:28:25, 7.14s/it] {'loss': 0.2825, 'grad_norm': 0.7382946590902313, 'learning_rate': 5.327868968247613e-07, 'epoch': 0.86} + 86%|████████▌ | 10436/12188 [56:25<3:28:25, 7.14s/it] 86%|████████▌ | 10437/12188 [56:31<3:26:00, 7.06s/it] {'loss': 0.2916, 'grad_norm': 0.705989187648237, 'learning_rate': 5.32190229901931e-07, 'epoch': 0.86} + 86%|████████▌ | 10437/12188 [56:31<3:26:00, 7.06s/it] 86%|████████▌ | 10438/12188 [56:40<3:41:03, 7.58s/it] {'loss': 0.2863, 'grad_norm': 0.6793653724127765, 'learning_rate': 5.315938784886499e-07, 'epoch': 0.86} + 86%|████████▌ | 10438/12188 [56:40<3:41:03, 7.58s/it] 86%|████████▌ | 10439/12188 [56:47<3:36:51, 7.44s/it] {'loss': 0.2819, 'grad_norm': 0.6788263744057849, 'learning_rate': 5.309978426270296e-07, 'epoch': 0.86} + 86%|████████▌ | 10439/12188 [56:47<3:36:51, 7.44s/it] 86%|████████▌ | 10440/12188 [56:54<3:33:09, 7.32s/it] {'loss': 0.3203, 'grad_norm': 0.7068113983263081, 'learning_rate': 5.30402122359161e-07, 'epoch': 0.86} + 86%|████████▌ | 10440/12188 [56:54<3:33:09, 7.32s/it] 86%|████████▌ | 10441/12188 [57:02<3:34:48, 7.38s/it] {'loss': 0.2668, 'grad_norm': 0.7253118886136739, 'learning_rate': 5.298067177271144e-07, 'epoch': 0.86} + 86%|████████▌ | 10441/12188 [57:02<3:34:48, 7.38s/it] 86%|████████▌ | 10442/12188 [57:09<3:34:04, 7.36s/it] {'loss': 0.2973, 'grad_norm': 0.6572748352661523, 'learning_rate': 5.292116287729348e-07, 'epoch': 0.86} + 86%|████████▌ | 10442/12188 [57:09<3:34:04, 7.36s/it] 86%|████████▌ | 10443/12188 [57:16<3:30:32, 7.24s/it] {'loss': 0.285, 'grad_norm': 0.6988913364869492, 'learning_rate': 5.286168555386478e-07, 'epoch': 0.86} + 86%|████████▌ | 10443/12188 [57:16<3:30:32, 7.24s/it] 86%|████████▌ | 10444/12188 [57:23<3:29:19, 7.20s/it] {'loss': 0.3114, 'grad_norm': 0.7015111231172649, 'learning_rate': 5.280223980662535e-07, 'epoch': 0.86} + 86%|████████▌ | 10444/12188 [57:23<3:29:19, 7.20s/it] 86%|████████▌ | 10445/12188 [57:31<3:29:35, 7.21s/it] {'loss': 0.3406, 'grad_norm': 1.0306227910266808, 'learning_rate': 5.274282563977328e-07, 'epoch': 0.86} + 86%|████████▌ | 10445/12188 [57:31<3:29:35, 7.21s/it] 86%|████████▌ | 10446/12188 [57:38<3:33:03, 7.34s/it] {'loss': 0.2643, 'grad_norm': 0.7308630673598016, 'learning_rate': 5.268344305750439e-07, 'epoch': 0.86} + 86%|████████▌ | 10446/12188 [57:38<3:33:03, 7.34s/it] 86%|████████▌ | 10447/12188 [57:45<3:28:20, 7.18s/it] {'loss': 0.2552, 'grad_norm': 0.6515810850528259, 'learning_rate': 5.262409206401198e-07, 'epoch': 0.86} + 86%|████████▌ | 10447/12188 [57:45<3:28:20, 7.18s/it] 86%|████████▌ | 10448/12188 [57:52<3:25:42, 7.09s/it] {'loss': 0.3268, 'grad_norm': 0.7629516161657715, 'learning_rate': 5.256477266348747e-07, 'epoch': 0.86} + 86%|████████▌ | 10448/12188 [57:52<3:25:42, 7.09s/it] 86%|████████▌ | 10449/12188 [58:00<3:31:03, 7.28s/it] {'loss': 0.3102, 'grad_norm': 0.6509661143366731, 'learning_rate': 5.250548486011992e-07, 'epoch': 0.86} + 86%|████████▌ | 10449/12188 [58:00<3:31:03, 7.28s/it] 86%|████████▌ | 10450/12188 [58:10<3:57:59, 8.22s/it] {'loss': 0.288, 'grad_norm': 0.6588966668663533, 'learning_rate': 5.244622865809596e-07, 'epoch': 0.86} + 86%|████████▌ | 10450/12188 [58:10<3:57:59, 8.22s/it] 86%|████████▌ | 10451/12188 [58:17<3:49:23, 7.92s/it] {'loss': 0.3506, 'grad_norm': 0.696539003059241, 'learning_rate': 5.238700406160036e-07, 'epoch': 0.86} + 86%|████████▌ | 10451/12188 [58:17<3:49:23, 7.92s/it] 86%|████████▌ | 10452/12188 [58:24<3:40:46, 7.63s/it] {'loss': 0.2697, 'grad_norm': 0.6721204390538145, 'learning_rate': 5.232781107481544e-07, 'epoch': 0.86} + 86%|████████▌ | 10452/12188 [58:24<3:40:46, 7.63s/it] 86%|████████▌ | 10453/12188 [58:31<3:31:50, 7.33s/it] {'loss': 0.257, 'grad_norm': 0.6653513802015135, 'learning_rate': 5.226864970192114e-07, 'epoch': 0.86} + 86%|████████▌ | 10453/12188 [58:31<3:31:50, 7.33s/it] 86%|████████▌ | 10454/12188 [58:39<3:38:18, 7.55s/it] {'loss': 0.2765, 'grad_norm': 0.670704703798943, 'learning_rate': 5.220951994709555e-07, 'epoch': 0.86} + 86%|████████▌ | 10454/12188 [58:39<3:38:18, 7.55s/it] 86%|████████▌ | 10455/12188 [58:46<3:31:48, 7.33s/it] {'loss': 0.2637, 'grad_norm': 0.8508532609107936, 'learning_rate': 5.215042181451418e-07, 'epoch': 0.86} + 86%|████████▌ | 10455/12188 [58:46<3:31:48, 7.33s/it] 86%|████████▌ | 10456/12188 [58:52<3:25:27, 7.12s/it] {'loss': 0.3036, 'grad_norm': 0.8051638445728574, 'learning_rate': 5.209135530835053e-07, 'epoch': 0.86} + 86%|████████▌ | 10456/12188 [58:52<3:25:27, 7.12s/it] 86%|████████▌ | 10457/12188 [59:00<3:31:11, 7.32s/it] {'loss': 0.3285, 'grad_norm': 0.7637715442131313, 'learning_rate': 5.203232043277568e-07, 'epoch': 0.86} + 86%|████████▌ | 10457/12188 [59:00<3:31:11, 7.32s/it] 86%|████████▌ | 10458/12188 [59:10<3:53:23, 8.09s/it] {'loss': 0.3214, 'grad_norm': 0.8972091024364858, 'learning_rate': 5.197331719195864e-07, 'epoch': 0.86} + 86%|████████▌ | 10458/12188 [59:10<3:53:23, 8.09s/it] 86%|████████▌ | 10459/12188 [59:16<3:39:13, 7.61s/it] {'loss': 0.253, 'grad_norm': 0.6998692269404939, 'learning_rate': 5.191434559006625e-07, 'epoch': 0.86} + 86%|████████▌ | 10459/12188 [59:16<3:39:13, 7.61s/it] 86%|████████▌ | 10460/12188 [59:24<3:40:59, 7.67s/it] {'loss': 0.3272, 'grad_norm': 0.7280307746491453, 'learning_rate': 5.185540563126274e-07, 'epoch': 0.86} + 86%|████████▌ | 10460/12188 [59:24<3:40:59, 7.67s/it] 86%|████████▌ | 10461/12188 [59:31<3:35:06, 7.47s/it] {'loss': 0.3384, 'grad_norm': 0.7858826115525354, 'learning_rate': 5.179649731971059e-07, 'epoch': 0.86} + 86%|████████▌ | 10461/12188 [59:31<3:35:06, 7.47s/it] 86%|████████▌ | 10462/12188 [59:38<3:29:09, 7.27s/it] {'loss': 0.281, 'grad_norm': 0.8716340385353915, 'learning_rate': 5.173762065956967e-07, 'epoch': 0.86} + 86%|████████▌ | 10462/12188 [59:38<3:29:09, 7.27s/it] 86%|████████▌ | 10463/12188 [59:45<3:26:42, 7.19s/it] {'loss': 0.3019, 'grad_norm': 0.6518546642148657, 'learning_rate': 5.167877565499774e-07, 'epoch': 0.86} + 86%|████████▌ | 10463/12188 [59:45<3:26:42, 7.19s/it] 86%|████████▌ | 10464/12188 [59:55<3:50:47, 8.03s/it] {'loss': 0.2911, 'grad_norm': 0.7111019331308541, 'learning_rate': 5.161996231015049e-07, 'epoch': 0.86} + 86%|████████▌ | 10464/12188 [59:55<3:50:47, 8.03s/it] 86%|████████▌ | 10465/12188 [1:00:02<3:39:40, 7.65s/it] {'loss': 0.3068, 'grad_norm': 0.8617113253530183, 'learning_rate': 5.156118062918098e-07, 'epoch': 0.86} + 86%|████████▌ | 10465/12188 [1:00:02<3:39:40, 7.65s/it] 86%|████████▌ | 10466/12188 [1:00:09<3:37:01, 7.56s/it] {'loss': 0.3223, 'grad_norm': 0.6833676334134579, 'learning_rate': 5.150243061624055e-07, 'epoch': 0.86} + 86%|████████▌ | 10466/12188 [1:00:09<3:37:01, 7.56s/it] 86%|████████▌ | 10467/12188 [1:00:18<3:44:14, 7.82s/it] {'loss': 0.263, 'grad_norm': 0.6160990181991944, 'learning_rate': 5.144371227547795e-07, 'epoch': 0.86} + 86%|████████▌ | 10467/12188 [1:00:18<3:44:14, 7.82s/it] 86%|████████▌ | 10468/12188 [1:00:25<3:37:36, 7.59s/it] {'loss': 0.284, 'grad_norm': 0.6960772782158595, 'learning_rate': 5.138502561103959e-07, 'epoch': 0.86} + 86%|████████▌ | 10468/12188 [1:00:25<3:37:36, 7.59s/it] 86%|████████▌ | 10469/12188 [1:00:33<3:41:41, 7.74s/it] {'loss': 0.2899, 'grad_norm': 0.6545796324517511, 'learning_rate': 5.132637062707008e-07, 'epoch': 0.86} + 86%|████████▌ | 10469/12188 [1:00:33<3:41:41, 7.74s/it] 86%|████████▌ | 10470/12188 [1:00:41<3:42:56, 7.79s/it] {'loss': 0.2746, 'grad_norm': 0.7283812115816869, 'learning_rate': 5.126774732771129e-07, 'epoch': 0.86} + 86%|████████▌ | 10470/12188 [1:00:41<3:42:56, 7.79s/it] 86%|████████▌ | 10471/12188 [1:00:48<3:36:06, 7.55s/it] {'loss': 0.3179, 'grad_norm': 0.9123011767642825, 'learning_rate': 5.120915571710328e-07, 'epoch': 0.86} + 86%|████████▌ | 10471/12188 [1:00:48<3:36:06, 7.55s/it] 86%|████████▌ | 10472/12188 [1:00:55<3:32:14, 7.42s/it] {'loss': 0.2807, 'grad_norm': 0.7445324988042634, 'learning_rate': 5.115059579938375e-07, 'epoch': 0.86} + 86%|████████▌ | 10472/12188 [1:00:55<3:32:14, 7.42s/it] 86%|████████▌ | 10473/12188 [1:01:02<3:28:16, 7.29s/it] {'loss': 0.3045, 'grad_norm': 0.657518225435835, 'learning_rate': 5.10920675786879e-07, 'epoch': 0.86} + 86%|████████▌ | 10473/12188 [1:01:02<3:28:16, 7.29s/it] 86%|████████▌ | 10474/12188 [1:01:09<3:29:20, 7.33s/it] {'loss': 0.312, 'grad_norm': 0.716532558739453, 'learning_rate': 5.103357105914913e-07, 'epoch': 0.86} + 86%|████████▌ | 10474/12188 [1:01:09<3:29:20, 7.33s/it] 86%|████████▌ | 10475/12188 [1:01:16<3:28:57, 7.32s/it] {'loss': 0.3074, 'grad_norm': 0.6822045251672232, 'learning_rate': 5.097510624489816e-07, 'epoch': 0.86} + 86%|████████▌ | 10475/12188 [1:01:16<3:28:57, 7.32s/it] 86%|████████▌ | 10476/12188 [1:01:24<3:27:22, 7.27s/it] {'loss': 0.3149, 'grad_norm': 0.8492133367649478, 'learning_rate': 5.091667314006371e-07, 'epoch': 0.86} + 86%|████████▌ | 10476/12188 [1:01:24<3:27:22, 7.27s/it] 86%|████████▌ | 10477/12188 [1:01:31<3:30:22, 7.38s/it] {'loss': 0.3006, 'grad_norm': 0.6784992759442127, 'learning_rate': 5.085827174877245e-07, 'epoch': 0.86} + 86%|████████▌ | 10477/12188 [1:01:31<3:30:22, 7.38s/it] 86%|████████▌ | 10478/12188 [1:01:38<3:25:39, 7.22s/it] {'loss': 0.2806, 'grad_norm': 0.723395613499971, 'learning_rate': 5.079990207514835e-07, 'epoch': 0.86} + 86%|████████▌ | 10478/12188 [1:01:38<3:25:39, 7.22s/it] 86%|████████▌ | 10479/12188 [1:01:45<3:22:55, 7.12s/it] {'loss': 0.3054, 'grad_norm': 0.655266497030898, 'learning_rate': 5.074156412331354e-07, 'epoch': 0.86} + 86%|████████▌ | 10479/12188 [1:01:45<3:22:55, 7.12s/it] 86%|████████▌ | 10480/12188 [1:01:52<3:22:25, 7.11s/it] {'loss': 0.3121, 'grad_norm': 0.8376487065219661, 'learning_rate': 5.068325789738771e-07, 'epoch': 0.86} + 86%|████████▌ | 10480/12188 [1:01:52<3:22:25, 7.11s/it] 86%|████████▌ | 10481/12188 [1:02:00<3:27:08, 7.28s/it] {'loss': 0.2793, 'grad_norm': 0.6505651763293763, 'learning_rate': 5.062498340148819e-07, 'epoch': 0.86} + 86%|████████▌ | 10481/12188 [1:02:00<3:27:08, 7.28s/it] 86%|████████▌ | 10482/12188 [1:02:07<3:22:50, 7.13s/it] {'loss': 0.2696, 'grad_norm': 0.6650404751956772, 'learning_rate': 5.05667406397305e-07, 'epoch': 0.86} + 86%|████████▌ | 10482/12188 [1:02:07<3:22:50, 7.13s/it] 86%|████████▌ | 10483/12188 [1:02:13<3:17:53, 6.96s/it] {'loss': 0.2866, 'grad_norm': 0.6387112424638193, 'learning_rate': 5.050852961622738e-07, 'epoch': 0.86} + 86%|████████▌ | 10483/12188 [1:02:13<3:17:53, 6.96s/it] 86%|████████▌ | 10484/12188 [1:02:20<3:18:36, 6.99s/it] {'loss': 0.2884, 'grad_norm': 0.9754848460363997, 'learning_rate': 5.045035033508977e-07, 'epoch': 0.86} + 86%|████████▌ | 10484/12188 [1:02:20<3:18:36, 6.99s/it] 86%|████████▌ | 10485/12188 [1:02:27<3:18:53, 7.01s/it] {'loss': 0.3073, 'grad_norm': 0.7515879016199118, 'learning_rate': 5.039220280042623e-07, 'epoch': 0.86} + 86%|████████▌ | 10485/12188 [1:02:27<3:18:53, 7.01s/it] 86%|████████▌ | 10486/12188 [1:02:34<3:17:17, 6.96s/it] {'loss': 0.2917, 'grad_norm': 0.7007529580306034, 'learning_rate': 5.03340870163429e-07, 'epoch': 0.86} + 86%|████████▌ | 10486/12188 [1:02:34<3:17:17, 6.96s/it] 86%|████████▌ | 10487/12188 [1:02:44<3:45:09, 7.94s/it] {'loss': 0.3142, 'grad_norm': 0.6884803517453678, 'learning_rate': 5.027600298694397e-07, 'epoch': 0.86} + 86%|████████▌ | 10487/12188 [1:02:44<3:45:09, 7.94s/it] 86%|████████▌ | 10488/12188 [1:02:51<3:38:30, 7.71s/it] {'loss': 0.2828, 'grad_norm': 0.6896916279980139, 'learning_rate': 5.021795071633113e-07, 'epoch': 0.86} + 86%|████████▌ | 10488/12188 [1:02:51<3:38:30, 7.71s/it] 86%|████████▌ | 10489/12188 [1:02:58<3:30:50, 7.45s/it] {'loss': 0.3035, 'grad_norm': 0.7788509078545195, 'learning_rate': 5.015993020860394e-07, 'epoch': 0.86} + 86%|████████▌ | 10489/12188 [1:02:58<3:30:50, 7.45s/it] 86%|████████▌ | 10490/12188 [1:03:05<3:25:21, 7.26s/it] {'loss': 0.2976, 'grad_norm': 0.7266142353293297, 'learning_rate': 5.010194146785985e-07, 'epoch': 0.86} + 86%|████████▌ | 10490/12188 [1:03:05<3:25:21, 7.26s/it] 86%|████████▌ | 10491/12188 [1:03:13<3:30:18, 7.44s/it] {'loss': 0.2764, 'grad_norm': 0.6969875394980891, 'learning_rate': 5.004398449819376e-07, 'epoch': 0.86} + 86%|████████▌ | 10491/12188 [1:03:13<3:30:18, 7.44s/it] 86%|████████▌ | 10492/12188 [1:03:22<3:44:05, 7.93s/it] {'loss': 0.3779, 'grad_norm': 0.9386574177767544, 'learning_rate': 4.998605930369865e-07, 'epoch': 0.86} + 86%|████████▌ | 10492/12188 [1:03:22<3:44:05, 7.93s/it] 86%|████████▌ | 10493/12188 [1:03:29<3:35:00, 7.61s/it] {'loss': 0.3128, 'grad_norm': 0.7067309007895678, 'learning_rate': 4.992816588846495e-07, 'epoch': 0.86} + 86%|████████▌ | 10493/12188 [1:03:29<3:35:00, 7.61s/it] 86%|████████▌ | 10494/12188 [1:03:37<3:37:15, 7.70s/it] {'loss': 0.3145, 'grad_norm': 0.7055625394382342, 'learning_rate': 4.987030425658118e-07, 'epoch': 0.86} + 86%|████████▌ | 10494/12188 [1:03:37<3:37:15, 7.70s/it] 86%|████████▌ | 10495/12188 [1:03:44<3:30:45, 7.47s/it] {'loss': 0.2971, 'grad_norm': 0.7897332413406077, 'learning_rate': 4.981247441213333e-07, 'epoch': 0.86} + 86%|████████▌ | 10495/12188 [1:03:44<3:30:45, 7.47s/it] 86%|████████▌ | 10496/12188 [1:03:51<3:27:45, 7.37s/it] {'loss': 0.33, 'grad_norm': 0.6908129016839427, 'learning_rate': 4.975467635920517e-07, 'epoch': 0.86} + 86%|████████▌ | 10496/12188 [1:03:51<3:27:45, 7.37s/it] 86%|████████▌ | 10497/12188 [1:04:00<3:44:49, 7.98s/it] {'loss': 0.2895, 'grad_norm': 0.6701443157407245, 'learning_rate': 4.969691010187838e-07, 'epoch': 0.86} + 86%|████████▌ | 10497/12188 [1:04:00<3:44:49, 7.98s/it] 86%|████████▌ | 10498/12188 [1:04:09<3:47:44, 8.09s/it] {'loss': 0.3209, 'grad_norm': 0.6958643145473634, 'learning_rate': 4.963917564423243e-07, 'epoch': 0.86} + 86%|████████▌ | 10498/12188 [1:04:09<3:47:44, 8.09s/it] 86%|████████▌ | 10499/12188 [1:04:18<3:58:20, 8.47s/it] {'loss': 0.2808, 'grad_norm': 0.6538909008919913, 'learning_rate': 4.958147299034421e-07, 'epoch': 0.86} + 86%|████████▌ | 10499/12188 [1:04:18<3:58:20, 8.47s/it] 86%|████████▌ | 10500/12188 [1:04:25<3:43:05, 7.93s/it] {'loss': 0.2982, 'grad_norm': 0.7221008126842507, 'learning_rate': 4.952380214428886e-07, 'epoch': 0.86} + 86%|████████▌ | 10500/12188 [1:04:25<3:43:05, 7.93s/it] 86%|████████▌ | 10501/12188 [1:04:32<3:35:50, 7.68s/it] {'loss': 0.3034, 'grad_norm': 0.6398423165123295, 'learning_rate': 4.946616311013875e-07, 'epoch': 0.86} + 86%|████████▌ | 10501/12188 [1:04:32<3:35:50, 7.68s/it] 86%|████████▌ | 10502/12188 [1:04:39<3:28:13, 7.41s/it] {'loss': 0.3026, 'grad_norm': 0.7901365304797278, 'learning_rate': 4.940855589196436e-07, 'epoch': 0.86} + 86%|████████▌ | 10502/12188 [1:04:39<3:28:13, 7.41s/it] 86%|████████▌ | 10503/12188 [1:04:46<3:25:40, 7.32s/it] {'loss': 0.3193, 'grad_norm': 0.6973274496646941, 'learning_rate': 4.935098049383391e-07, 'epoch': 0.86} + 86%|████████▌ | 10503/12188 [1:04:46<3:25:40, 7.32s/it] 86%|████████▌ | 10504/12188 [1:04:53<3:27:40, 7.40s/it] {'loss': 0.3102, 'grad_norm': 0.7092525088309088, 'learning_rate': 4.929343691981309e-07, 'epoch': 0.86} + 86%|████████▌ | 10504/12188 [1:04:53<3:27:40, 7.40s/it] 86%|████████▌ | 10505/12188 [1:05:03<3:44:03, 7.99s/it] {'loss': 0.3022, 'grad_norm': 0.7260139706719232, 'learning_rate': 4.923592517396575e-07, 'epoch': 0.86} + 86%|███████���▌ | 10505/12188 [1:05:03<3:44:03, 7.99s/it] 86%|████████▌ | 10506/12188 [1:05:10<3:40:27, 7.86s/it] {'loss': 0.2989, 'grad_norm': 0.9036614930351566, 'learning_rate': 4.917844526035304e-07, 'epoch': 0.86} + 86%|████████▌ | 10506/12188 [1:05:10<3:40:27, 7.86s/it] 86%|████████▌ | 10507/12188 [1:05:17<3:31:15, 7.54s/it] {'loss': 0.3002, 'grad_norm': 0.6709308399744102, 'learning_rate': 4.912099718303437e-07, 'epoch': 0.86} + 86%|████████▌ | 10507/12188 [1:05:17<3:31:15, 7.54s/it] 86%|████████▌ | 10508/12188 [1:05:24<3:27:31, 7.41s/it] {'loss': 0.2897, 'grad_norm': 0.7336822445881324, 'learning_rate': 4.906358094606645e-07, 'epoch': 0.86} + 86%|████████▌ | 10508/12188 [1:05:24<3:27:31, 7.41s/it] 86%|████████▌ | 10509/12188 [1:05:32<3:32:54, 7.61s/it] {'loss': 0.2811, 'grad_norm': 0.719294695889186, 'learning_rate': 4.900619655350386e-07, 'epoch': 0.86} + 86%|████████▌ | 10509/12188 [1:05:32<3:32:54, 7.61s/it] 86%|████████▌ | 10510/12188 [1:05:41<3:44:43, 8.04s/it] {'loss': 0.3041, 'grad_norm': 0.7193159375506184, 'learning_rate': 4.89488440093992e-07, 'epoch': 0.86} + 86%|████████▌ | 10510/12188 [1:05:41<3:44:43, 8.04s/it] 86%|████████▌ | 10511/12188 [1:05:48<3:37:15, 7.77s/it] {'loss': 0.2727, 'grad_norm': 0.6932497902408595, 'learning_rate': 4.889152331780245e-07, 'epoch': 0.86} + 86%|████████▌ | 10511/12188 [1:05:48<3:37:15, 7.77s/it] 86%|████████▌ | 10512/12188 [1:05:55<3:27:13, 7.42s/it] {'loss': 0.3153, 'grad_norm': 0.710584541197295, 'learning_rate': 4.883423448276154e-07, 'epoch': 0.86} + 86%|████████▌ | 10512/12188 [1:05:55<3:27:13, 7.42s/it] 86%|████████▋ | 10513/12188 [1:06:02<3:24:52, 7.34s/it] {'loss': 0.2697, 'grad_norm': 0.7143105658383468, 'learning_rate': 4.877697750832222e-07, 'epoch': 0.86} + 86%|████████▋ | 10513/12188 [1:06:02<3:24:52, 7.34s/it] 86%|████████▋ | 10514/12188 [1:06:10<3:28:14, 7.46s/it] {'loss': 0.2755, 'grad_norm': 0.6957261066619003, 'learning_rate': 4.871975239852766e-07, 'epoch': 0.86} + 86%|████████▋ | 10514/12188 [1:06:10<3:28:14, 7.46s/it] 86%|████████▋ | 10515/12188 [1:06:17<3:24:02, 7.32s/it] {'loss': 0.3272, 'grad_norm': 0.7369139591078514, 'learning_rate': 4.866255915741924e-07, 'epoch': 0.86} + 86%|████████▋ | 10515/12188 [1:06:17<3:24:02, 7.32s/it] 86%|████████▋ | 10516/12188 [1:06:24<3:24:27, 7.34s/it] {'loss': 0.3214, 'grad_norm': 0.7199583869583912, 'learning_rate': 4.860539778903579e-07, 'epoch': 0.86} + 86%|████████▋ | 10516/12188 [1:06:24<3:24:27, 7.34s/it] 86%|████████▋ | 10517/12188 [1:06:31<3:21:17, 7.23s/it] {'loss': 0.2792, 'grad_norm': 0.6802183955329376, 'learning_rate': 4.854826829741388e-07, 'epoch': 0.86} + 86%|████████▋ | 10517/12188 [1:06:31<3:21:17, 7.23s/it][2025-08-17 23:29:56,011] [WARNING] [stage3.py:2118:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time + 86%|████████▋ | 10518/12188 [1:06:43<3:55:54, 8.48s/it] {'loss': 0.2939, 'grad_norm': 0.8273571410236386, 'learning_rate': 4.849117068658799e-07, 'epoch': 0.86} + 86%|████████▋ | 10518/12188 [1:06:43<3:55:54, 8.48s/it] 86%|████████▋ | 10519/12188 [1:06:50<3:44:20, 8.06s/it] {'loss': 0.2718, 'grad_norm': 0.6896252559991214, 'learning_rate': 4.84341049605902e-07, 'epoch': 0.86} + 86%|████████▋ | 10519/12188 [1:06:50<3:44:20, 8.06s/it] 86%|████████▋ | 10520/12188 [1:06:57<3:41:35, 7.97s/it] {'loss': 0.3047, 'grad_norm': 0.7246366979421274, 'learning_rate': 4.837707112345047e-07, 'epoch': 0.86} + 86%|████████▋ | 10520/12188 [1:06:57<3:41:35, 7.97s/it] 86%|████████▋ | 10521/12188 [1:07:05<3:34:23, 7.72s/it] {'loss': 0.3187, 'grad_norm': 0.6919468138701186, 'learning_rate': 4.832006917919629e-07, 'epoch': 0.86} + 86%|████████▋ | 10521/12188 [1:07:05<3:34:23, 7.72s/it] 86%|████████▋ | 10522/12188 [1:07:11<3:27:13, 7.46s/it] {'loss': 0.2731, 'grad_norm': 0.6801568401142465, 'learning_rate': 4.826309913185323e-07, 'epoch': 0.86} + 86%|████████▋ | 10522/12188 [1:07:11<3:27:13, 7.46s/it] 86%|████████▋ | 10523/12188 [1:07:19<3:24:30, 7.37s/it] {'loss': 0.2937, 'grad_norm': 0.69955001228473, 'learning_rate': 4.820616098544439e-07, 'epoch': 0.86} + 86%|████████▋ | 10523/12188 [1:07:19<3:24:30, 7.37s/it] 86%|████████▋ | 10524/12188 [1:07:26<3:21:27, 7.26s/it] {'loss': 0.2857, 'grad_norm': 0.7043722869998804, 'learning_rate': 4.814925474399046e-07, 'epoch': 0.86} + 86%|████████▋ | 10524/12188 [1:07:26<3:21:27, 7.26s/it] 86%|████████▋ | 10525/12188 [1:07:35<3:40:30, 7.96s/it] {'loss': 0.2788, 'grad_norm': 0.932508153914763, 'learning_rate': 4.809238041151021e-07, 'epoch': 0.86} + 86%|████████▋ | 10525/12188 [1:07:35<3:40:30, 7.96s/it] 86%|████████▋ | 10526/12188 [1:07:42<3:34:24, 7.74s/it] {'loss': 0.3233, 'grad_norm': 0.6885130109025173, 'learning_rate': 4.803553799202016e-07, 'epoch': 0.86} + 86%|████████▋ | 10526/12188 [1:07:42<3:34:24, 7.74s/it] 86%|████████▋ | 10527/12188 [1:07:50<3:33:08, 7.70s/it] {'loss': 0.2957, 'grad_norm': 0.6838879375142456, 'learning_rate': 4.797872748953414e-07, 'epoch': 0.86} + 86%|████████▋ | 10527/12188 [1:07:50<3:33:08, 7.70s/it] 86%|████████▋ | 10528/12188 [1:07:59<3:43:35, 8.08s/it] {'loss': 0.3157, 'grad_norm': 0.7439638009252532, 'learning_rate': 4.792194890806423e-07, 'epoch': 0.86} + 86%|████████▋ | 10528/12188 [1:07:59<3:43:35, 8.08s/it] 86%|████████▋ | 10529/12188 [1:08:06<3:36:51, 7.84s/it] {'loss': 0.3628, 'grad_norm': 0.7681122384122422, 'learning_rate': 4.786520225161989e-07, 'epoch': 0.86} + 86%|████████▋ | 10529/12188 [1:08:06<3:36:51, 7.84s/it] 86%|████████▋ | 10530/12188 [1:08:13<3:31:44, 7.66s/it] {'loss': 0.2856, 'grad_norm': 0.6672343592314142, 'learning_rate': 4.780848752420858e-07, 'epoch': 0.86} + 86%|████████▋ | 10530/12188 [1:08:13<3:31:44, 7.66s/it] 86%|████████▋ | 10531/12188 [1:08:21<3:27:06, 7.50s/it] {'loss': 0.3256, 'grad_norm': 0.6899928084188163, 'learning_rate': 4.775180472983549e-07, 'epoch': 0.86} + 86%|████████▋ | 10531/12188 [1:08:21<3:27:06, 7.50s/it] 86%|████████▋ | 10532/12188 [1:08:27<3:20:37, 7.27s/it] {'loss': 0.3056, 'grad_norm': 0.743291485796909, 'learning_rate': 4.769515387250317e-07, 'epoch': 0.86} + 86%|████████▋ | 10532/12188 [1:08:27<3:20:37, 7.27s/it] 86%|████████▋ | 10533/12188 [1:08:35<3:21:02, 7.29s/it] {'loss': 0.3225, 'grad_norm': 0.7007735067242904, 'learning_rate': 4.763853495621251e-07, 'epoch': 0.86} + 86%|████████▋ | 10533/12188 [1:08:35<3:21:02, 7.29s/it] 86%|████████▋ | 10534/12188 [1:08:42<3:18:30, 7.20s/it] {'loss': 0.3022, 'grad_norm': 0.7162739673865419, 'learning_rate': 4.75819479849618e-07, 'epoch': 0.86} + 86%|████████▋ | 10534/12188 [1:08:42<3:18:30, 7.20s/it] 86%|████████▋ | 10535/12188 [1:08:48<3:14:14, 7.05s/it] {'loss': 0.2882, 'grad_norm': 0.6880624105526898, 'learning_rate': 4.7525392962746997e-07, 'epoch': 0.86} + 86%|████████▋ | 10535/12188 [1:08:48<3:14:14, 7.05s/it] 86%|████████▋ | 10536/12188 [1:08:55<3:11:29, 6.96s/it] {'loss': 0.2762, 'grad_norm': 0.6420340078710945, 'learning_rate': 4.746886989356203e-07, 'epoch': 0.86} + 86%|████████▋ | 10536/12188 [1:08:55<3:11:29, 6.96s/it] 86%|████████▋ | 10537/12188 [1:09:02<3:10:27, 6.92s/it] {'loss': 0.2824, 'grad_norm': 0.6724777861862873, 'learning_rate': 4.741237878139837e-07, 'epoch': 0.86} + 86%|████████▋ | 10537/12188 [1:09:02<3:10:27, 6.92s/it] 86%|████████▋ | 10538/12188 [1:09:10<3:16:30, 7.15s/it] {'loss': 0.3319, 'grad_norm': 0.7472013152891405, 'learning_rate': 4.735591963024533e-07, 'epoch': 0.86} + 86%|████████▋ | 10538/12188 [1:09:10<3:16:30, 7.15s/it] 86%|████████▋ | 10539/12188 [1:09:17<3:15:14, 7.10s/it] {'loss': 0.3285, 'grad_norm': 0.7924266833917618, 'learning_rate': 4.72994924440901e-07, 'epoch': 0.86} + 86%|████████▋ | 10539/12188 [1:09:17<3:15:14, 7.10s/it] 86%|████████▋ | 10540/12188 [1:09:24<3:18:23, 7.22s/it] {'loss': 0.3029, 'grad_norm': 0.7197283243604594, 'learning_rate': 4.7243097226917334e-07, 'epoch': 0.86} + 86%|████████▋ | 10540/12188 [1:09:24<3:18:23, 7.22s/it] 86%|████████▋ | 10541/12188 [1:09:33<3:30:36, 7.67s/it] {'loss': 0.3012, 'grad_norm': 0.7315546313974142, 'learning_rate': 4.718673398270973e-07, 'epoch': 0.86} + 86%|████████▋ | 10541/12188 [1:09:33<3:30:36, 7.67s/it] 86%|████████▋ | 10542/12188 [1:09:40<3:22:49, 7.39s/it] {'loss': 0.3219, 'grad_norm': 0.6959093775502289, 'learning_rate': 4.7130402715447386e-07, 'epoch': 0.86} + 86%|████████▋ | 10542/12188 [1:09:40<3:22:49, 7.39s/it] 87%|████████▋ | 10543/12188 [1:09:47<3:20:02, 7.30s/it] {'loss': 0.2835, 'grad_norm': 0.6747453408916528, 'learning_rate': 4.7074103429108397e-07, 'epoch': 0.86} + 87%|████████▋ | 10543/12188 [1:09:47<3:20:02, 7.30s/it] 87%|████████▋ | 10544/12188 [1:09:54<3:16:31, 7.17s/it] {'loss': 0.3082, 'grad_norm': 0.7289593049466903, 'learning_rate': 4.7017836127668636e-07, 'epoch': 0.87} + 87%|████████▋ | 10544/12188 [1:09:54<3:16:31, 7.17s/it] 87%|████████▋ | 10545/12188 [1:10:01<3:19:02, 7.27s/it] {'loss': 0.2906, 'grad_norm': 0.8683812005932262, 'learning_rate': 4.696160081510143e-07, 'epoch': 0.87} + 87%|████████▋ | 10545/12188 [1:10:01<3:19:02, 7.27s/it] 87%|████████▋ | 10546/12188 [1:10:08<3:12:36, 7.04s/it] {'loss': 0.3022, 'grad_norm': 0.7308859046119054, 'learning_rate': 4.69053974953782e-07, 'epoch': 0.87} + 87%|████████▋ | 10546/12188 [1:10:08<3:12:36, 7.04s/it] 87%|████████▋ | 10547/12188 [1:10:14<3:08:05, 6.88s/it] {'loss': 0.2789, 'grad_norm': 0.7094236006676231, 'learning_rate': 4.684922617246773e-07, 'epoch': 0.87} + 87%|████████▋ | 10547/12188 [1:10:14<3:08:05, 6.88s/it] 87%|████████▋ | 10548/12188 [1:10:21<3:09:16, 6.92s/it] {'loss': 0.3082, 'grad_norm': 0.7204885091468398, 'learning_rate': 4.679308685033701e-07, 'epoch': 0.87} + 87%|████████▋ | 10548/12188 [1:10:21<3:09:16, 6.92s/it] 87%|████████▋ | 10549/12188 [1:10:28<3:08:14, 6.89s/it] {'loss': 0.3033, 'grad_norm': 0.769874063383827, 'learning_rate': 4.67369795329502e-07, 'epoch': 0.87} + 87%|████████▋ | 10549/12188 [1:10:28<3:08:14, 6.89s/it] 87%|████████▋ | 10550/12188 [1:10:35<3:07:53, 6.88s/it] {'loss': 0.3083, 'grad_norm': 0.7578060415606446, 'learning_rate': 4.6680904224269797e-07, 'epoch': 0.87} + 87%|████████▋ | 10550/12188 [1:10:35<3:07:53, 6.88s/it] 87%|████████▋ | 10551/12188 [1:10:42<3:11:21, 7.01s/it] {'loss': 0.3033, 'grad_norm': 0.7241464908437139, 'learning_rate': 4.6624860928255576e-07, 'epoch': 0.87} + 87%|████████▋ | 10551/12188 [1:10:42<3:11:21, 7.01s/it] 87%|████████▋ | 10552/12188 [1:10:49<3:10:42, 6.99s/it] {'loss': 0.3212, 'grad_norm': 0.6899946277989757, 'learning_rate': 4.6568849648865323e-07, 'epoch': 0.87} + 87%|█████���██▋ | 10552/12188 [1:10:49<3:10:42, 6.99s/it] 87%|████████▋ | 10553/12188 [1:10:56<3:10:32, 6.99s/it] {'loss': 0.2924, 'grad_norm': 0.7917502449927445, 'learning_rate': 4.651287039005431e-07, 'epoch': 0.87} + 87%|████████▋ | 10553/12188 [1:10:56<3:10:32, 6.99s/it] 87%|████████▋ | 10554/12188 [1:11:03<3:12:16, 7.06s/it] {'loss': 0.2876, 'grad_norm': 0.6749968923888184, 'learning_rate': 4.6456923155775934e-07, 'epoch': 0.87} + 87%|████████▋ | 10554/12188 [1:11:03<3:12:16, 7.06s/it] 87%|████████▋ | 10555/12188 [1:11:10<3:11:54, 7.05s/it] {'loss': 0.2825, 'grad_norm': 0.6911201498433196, 'learning_rate': 4.640100794998087e-07, 'epoch': 0.87} + 87%|████████▋ | 10555/12188 [1:11:10<3:11:54, 7.05s/it] 87%|████████▋ | 10556/12188 [1:11:17<3:11:48, 7.05s/it] {'loss': 0.2725, 'grad_norm': 0.7275806130415753, 'learning_rate': 4.6345124776617847e-07, 'epoch': 0.87} + 87%|████████▋ | 10556/12188 [1:11:17<3:11:48, 7.05s/it] 87%|████████▋ | 10557/12188 [1:11:24<3:07:57, 6.91s/it] {'loss': 0.3168, 'grad_norm': 0.7342908194156311, 'learning_rate': 4.628927363963337e-07, 'epoch': 0.87} + 87%|████████▋ | 10557/12188 [1:11:24<3:07:57, 6.91s/it] 87%|████████▋ | 10558/12188 [1:11:31<3:12:48, 7.10s/it] {'loss': 0.2805, 'grad_norm': 0.6914620276836615, 'learning_rate': 4.623345454297135e-07, 'epoch': 0.87} + 87%|████████▋ | 10558/12188 [1:11:31<3:12:48, 7.10s/it] 87%|████████▋ | 10559/12188 [1:11:39<3:13:07, 7.11s/it] {'loss': 0.2516, 'grad_norm': 0.667331848589805, 'learning_rate': 4.617766749057384e-07, 'epoch': 0.87} + 87%|████████▋ | 10559/12188 [1:11:39<3:13:07, 7.11s/it] 87%|████████▋ | 10560/12188 [1:11:46<3:13:05, 7.12s/it] {'loss': 0.2679, 'grad_norm': 0.6229771331925342, 'learning_rate': 4.61219124863802e-07, 'epoch': 0.87} + 87%|████████▋ | 10560/12188 [1:11:46<3:13:05, 7.12s/it] 87%|████████▋ | 10561/12188 [1:11:54<3:18:43, 7.33s/it] {'loss': 0.2995, 'grad_norm': 0.7337897054795667, 'learning_rate': 4.606618953432795e-07, 'epoch': 0.87} + 87%|████████▋ | 10561/12188 [1:11:54<3:18:43, 7.33s/it] 87%|████████▋ | 10562/12188 [1:12:00<3:14:18, 7.17s/it] {'loss': 0.2992, 'grad_norm': 0.6964353382617873, 'learning_rate': 4.601049863835216e-07, 'epoch': 0.87} + 87%|████████▋ | 10562/12188 [1:12:00<3:14:18, 7.17s/it] 87%|████████▋ | 10563/12188 [1:12:07<3:13:01, 7.13s/it] {'loss': 0.2815, 'grad_norm': 0.701489546190898, 'learning_rate': 4.5954839802385575e-07, 'epoch': 0.87} + 87%|████████▋ | 10563/12188 [1:12:07<3:13:01, 7.13s/it] 87%|████████▋ | 10564/12188 [1:12:14<3:12:50, 7.12s/it] {'loss': 0.2802, 'grad_norm': 0.7804927380676383, 'learning_rate': 4.5899213030358657e-07, 'epoch': 0.87} + 87%|████████▋ | 10564/12188 [1:12:14<3:12:50, 7.12s/it] 87%|████████▋ | 10565/12188 [1:12:22<3:12:28, 7.12s/it] {'loss': 0.2714, 'grad_norm': 0.7252235638773757, 'learning_rate': 4.5843618326199713e-07, 'epoch': 0.87} + 87%|████████▋ | 10565/12188 [1:12:22<3:12:28, 7.12s/it] 87%|████████▋ | 10566/12188 [1:12:28<3:08:37, 6.98s/it] {'loss': 0.3231, 'grad_norm': 0.7395550687219129, 'learning_rate': 4.578805569383482e-07, 'epoch': 0.87} + 87%|████████▋ | 10566/12188 [1:12:28<3:08:37, 6.98s/it] 87%|████████▋ | 10567/12188 [1:12:36<3:11:10, 7.08s/it] {'loss': 0.3036, 'grad_norm': 0.7181164777181117, 'learning_rate': 4.5732525137187733e-07, 'epoch': 0.87} + 87%|████████▋ | 10567/12188 [1:12:36<3:11:10, 7.08s/it] 87%|████████▋ | 10568/12188 [1:12:45<3:29:56, 7.78s/it] {'loss': 0.2819, 'grad_norm': 0.6780091077876733, 'learning_rate': 4.567702666017976e-07, 'epoch': 0.87} + 87%|████████▋ | 10568/12188 [1:12:45<3:29:56, 7.78s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f6f260a0e50> +[Try #0] Failed to fetch sample 4462166 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f6f260a0e50> +Problematic sample: {'image': '20240822_131046_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Organic'"}, {'from': 'gpt', 'value': '\nclick(x=0.5535, y=0.139)\n'}]} + 87%|████████▋ | 10569/12188 [1:12:53<3:31:03, 7.82s/it] {'loss': 0.2975, 'grad_norm': 0.7341072827072443, 'learning_rate': 4.562156026673031e-07, 'epoch': 0.87} + 87%|████████▋ | 10569/12188 [1:12:53<3:31:03, 7.82s/it] 87%|████████▋ | 10570/12188 [1:13:00<3:25:19, 7.61s/it] {'loss': 0.3158, 'grad_norm': 0.681944045135168, 'learning_rate': 4.5566125960756324e-07, 'epoch': 0.87} + 87%|████████▋ | 10570/12188 [1:13:00<3:25:19, 7.61s/it] 87%|████████▋ | 10571/12188 [1:13:07<3:22:39, 7.52s/it] {'loss': 0.3079, 'grad_norm': 0.7596757787295955, 'learning_rate': 4.5510723746172326e-07, 'epoch': 0.87} + 87%|████████▋ | 10571/12188 [1:13:07<3:22:39, 7.52s/it] 87%|████████▋ | 10572/12188 [1:13:15<3:26:19, 7.66s/it] {'loss': 0.315, 'grad_norm': 0.8096600816272078, 'learning_rate': 4.545535362689091e-07, 'epoch': 0.87} + 87%|████████▋ | 10572/12188 [1:13:15<3:26:19, 7.66s/it] 87%|████████▋ | 10573/12188 [1:13:22<3:19:11, 7.40s/it] {'loss': 0.3233, 'grad_norm': 0.6665868829115636, 'learning_rate': 4.5400015606822003e-07, 'epoch': 0.87} + 87%|████████▋ | 10573/12188 [1:13:22<3:19:11, 7.40s/it] 87%|████████▋ | 10574/12188 [1:13:29<3:13:54, 7.21s/it] {'loss': 0.2844, 'grad_norm': 0.7523022061399908, 'learning_rate': 4.5344709689873703e-07, 'epoch': 0.87} + 87%|████████▋ | 10574/12188 [1:13:29<3:13:54, 7.21s/it] 87%|████████▋ | 10575/12188 [1:13:36<3:10:25, 7.08s/it] {'loss': 0.2915, 'grad_norm': 0.7168338358885655, 'learning_rate': 4.5289435879951605e-07, 'epoch': 0.87} + 87%|████████▋ | 10575/12188 [1:13:36<3:10:25, 7.08s/it] 87%|████████▋ | 10576/12188 [1:13:44<3:16:56, 7.33s/it] {'loss': 0.3301, 'grad_norm': 0.7232622364079276, 'learning_rate': 4.523419418095898e-07, 'epoch': 0.87} + 87%|████████▋ | 10576/12188 [1:13:44<3:16:56, 7.33s/it] 87%|████████▋ | 10577/12188 [1:13:51<3:15:45, 7.29s/it] {'loss': 0.3253, 'grad_norm': 0.7176075589766392, 'learning_rate': 4.5178984596796815e-07, 'epoch': 0.87} + 87%|████████▋ | 10577/12188 [1:13:51<3:15:45, 7.29s/it] 87%|████████▋ | 10578/12188 [1:13:58<3:16:36, 7.33s/it] {'loss': 0.29, 'grad_norm': 0.7681191147732237, 'learning_rate': 4.5123807131364164e-07, 'epoch': 0.87} + 87%|████████▋ | 10578/12188 [1:13:58<3:16:36, 7.33s/it] 87%|█���██████▋ | 10579/12188 [1:14:07<3:26:41, 7.71s/it] {'loss': 0.3034, 'grad_norm': 0.6846749112701292, 'learning_rate': 4.5068661788557345e-07, 'epoch': 0.87} + 87%|████████▋ | 10579/12188 [1:14:07<3:26:41, 7.71s/it] 87%|████████▋ | 10580/12188 [1:14:14<3:19:58, 7.46s/it] {'loss': 0.3029, 'grad_norm': 0.7549447297935975, 'learning_rate': 4.5013548572270806e-07, 'epoch': 0.87} + 87%|████████▋ | 10580/12188 [1:14:14<3:19:58, 7.46s/it] 87%|████████▋ | 10581/12188 [1:14:21<3:21:20, 7.52s/it] {'loss': 0.2772, 'grad_norm': 0.6764238646891694, 'learning_rate': 4.495846748639643e-07, 'epoch': 0.87} + 87%|████████▋ | 10581/12188 [1:14:21<3:21:20, 7.52s/it] 87%|████████▋ | 10582/12188 [1:14:33<3:52:57, 8.70s/it] {'loss': 0.2791, 'grad_norm': 0.6705217788960743, 'learning_rate': 4.4903418534824053e-07, 'epoch': 0.87} + 87%|████████▋ | 10582/12188 [1:14:33<3:52:57, 8.70s/it] 87%|████████▋ | 10583/12188 [1:14:40<3:38:08, 8.16s/it] {'loss': 0.2884, 'grad_norm': 0.6992219759174817, 'learning_rate': 4.4848401721440947e-07, 'epoch': 0.87} + 87%|████████▋ | 10583/12188 [1:14:40<3:38:08, 8.16s/it] 87%|████████▋ | 10584/12188 [1:14:46<3:27:30, 7.76s/it] {'loss': 0.3058, 'grad_norm': 0.6931198317190357, 'learning_rate': 4.4793417050132513e-07, 'epoch': 0.87} + 87%|████████▋ | 10584/12188 [1:14:46<3:27:30, 7.76s/it] 87%|████████▋ | 10585/12188 [1:14:53<3:19:55, 7.48s/it] {'loss': 0.2912, 'grad_norm': 0.7031581887238345, 'learning_rate': 4.47384645247817e-07, 'epoch': 0.87} + 87%|████████▋ | 10585/12188 [1:14:53<3:19:55, 7.48s/it] 87%|████████▋ | 10586/12188 [1:15:00<3:13:25, 7.24s/it] {'loss': 0.2865, 'grad_norm': 1.0071468131984826, 'learning_rate': 4.468354414926901e-07, 'epoch': 0.87} + 87%|████████▋ | 10586/12188 [1:15:00<3:13:25, 7.24s/it] 87%|████████▋ | 10587/12188 [1:15:07<3:13:08, 7.24s/it] {'loss': 0.2879, 'grad_norm': 0.6728881912727732, 'learning_rate': 4.4628655927472896e-07, 'epoch': 0.87} + 87%|████████▋ | 10587/12188 [1:15:07<3:13:08, 7.24s/it] 87%|████████▋ | 10588/12188 [1:15:14<3:09:53, 7.12s/it] {'loss': 0.2845, 'grad_norm': 0.7116558431628321, 'learning_rate': 4.45737998632696e-07, 'epoch': 0.87} + 87%|████████▋ | 10588/12188 [1:15:14<3:09:53, 7.12s/it] 87%|████████▋ | 10589/12188 [1:15:21<3:07:49, 7.05s/it] {'loss': 0.3265, 'grad_norm': 0.7071919948327169, 'learning_rate': 4.4518975960532786e-07, 'epoch': 0.87} + 87%|████████▋ | 10589/12188 [1:15:21<3:07:49, 7.05s/it] 87%|████████▋ | 10590/12188 [1:15:30<3:23:47, 7.65s/it] {'loss': 0.2921, 'grad_norm': 1.1714546424792724, 'learning_rate': 4.4464184223134143e-07, 'epoch': 0.87} + 87%|████████▋ | 10590/12188 [1:15:30<3:23:47, 7.65s/it] 87%|████████▋ | 10591/12188 [1:15:37<3:20:59, 7.55s/it] {'loss': 0.2886, 'grad_norm': 0.8493333261245375, 'learning_rate': 4.4409424654942967e-07, 'epoch': 0.87} + 87%|████████▋ | 10591/12188 [1:15:37<3:20:59, 7.55s/it] 87%|████████▋ | 10592/12188 [1:15:44<3:14:32, 7.31s/it] {'loss': 0.3343, 'grad_norm': 0.7186684700047721, 'learning_rate': 4.4354697259826165e-07, 'epoch': 0.87} + 87%|████████▋ | 10592/12188 [1:15:44<3:14:32, 7.31s/it] 87%|████████▋ | 10593/12188 [1:15:53<3:29:54, 7.90s/it] {'loss': 0.3148, 'grad_norm': 0.8403203671481098, 'learning_rate': 4.4300002041648695e-07, 'epoch': 0.87} + 87%|████████▋ | 10593/12188 [1:15:53<3:29:54, 7.90s/it] 87%|████████▋ | 10594/12188 [1:16:00<3:23:35, 7.66s/it] {'loss': 0.2988, 'grad_norm': 0.7792615946429285, 'learning_rate': 4.42453390042728e-07, 'epoch': 0.87} + 87%|████████▋ | 10594/12188 [1:16:00<3:23:35, 7.66s/it] 87%|████████▋ | 10595/12188 [1:16:08<3:19:24, 7.51s/it] {'loss': 0.2919, 'grad_norm': 0.7151099661046264, 'learning_rate': 4.419070815155896e-07, 'epoch': 0.87} + 87%|████████▋ | 10595/12188 [1:16:08<3:19:24, 7.51s/it] 87%|████████▋ | 10596/12188 [1:16:15<3:17:46, 7.45s/it] {'loss': 0.2771, 'grad_norm': 0.734259384683616, 'learning_rate': 4.413610948736491e-07, 'epoch': 0.87} + 87%|████████▋ | 10596/12188 [1:16:15<3:17:46, 7.45s/it] 87%|████████▋ | 10597/12188 [1:16:22<3:16:25, 7.41s/it] {'loss': 0.3195, 'grad_norm': 0.6255687856031251, 'learning_rate': 4.408154301554646e-07, 'epoch': 0.87} + 87%|████████▋ | 10597/12188 [1:16:22<3:16:25, 7.41s/it] 87%|████████▋ | 10598/12188 [1:16:30<3:17:10, 7.44s/it] {'loss': 0.2854, 'grad_norm': 0.6833411159338835, 'learning_rate': 4.4027008739956964e-07, 'epoch': 0.87} + 87%|████████▋ | 10598/12188 [1:16:30<3:17:10, 7.44s/it] 87%|████████▋ | 10599/12188 [1:16:37<3:13:59, 7.33s/it] {'loss': 0.2991, 'grad_norm': 0.7492644894003205, 'learning_rate': 4.397250666444747e-07, 'epoch': 0.87} + 87%|████████▋ | 10599/12188 [1:16:37<3:13:59, 7.33s/it] 87%|████████▋ | 10600/12188 [1:16:43<3:08:21, 7.12s/it] {'loss': 0.3017, 'grad_norm': 0.7938653504996882, 'learning_rate': 4.3918036792866993e-07, 'epoch': 0.87} + 87%|████████▋ | 10600/12188 [1:16:43<3:08:21, 7.12s/it] 87%|████████▋ | 10601/12188 [1:16:50<3:07:20, 7.08s/it] {'loss': 0.259, 'grad_norm': 0.6818439628417123, 'learning_rate': 4.3863599129061905e-07, 'epoch': 0.87} + 87%|████████▋ | 10601/12188 [1:16:50<3:07:20, 7.08s/it] 87%|████████▋ | 10602/12188 [1:16:57<3:03:25, 6.94s/it] {'loss': 0.2739, 'grad_norm': 0.6899948236899512, 'learning_rate': 4.3809193676876584e-07, 'epoch': 0.87} + 87%|████████▋ | 10602/12188 [1:16:57<3:03:25, 6.94s/it] 87%|████████▋ | 10603/12188 [1:17:04<3:00:39, 6.84s/it] {'loss': 0.2954, 'grad_norm': 0.9019660476024225, 'learning_rate': 4.375482044015322e-07, 'epoch': 0.87} + 87%|████████▋ | 10603/12188 [1:17:04<3:00:39, 6.84s/it] 87%|████████▋ | 10604/12188 [1:17:12<3:14:29, 7.37s/it] {'loss': 0.3073, 'grad_norm': 0.71116206694988, 'learning_rate': 4.3700479422731424e-07, 'epoch': 0.87} + 87%|████████▋ | 10604/12188 [1:17:12<3:14:29, 7.37s/it] 87%|████████▋ | 10605/12188 [1:17:20<3:21:06, 7.62s/it] {'loss': 0.272, 'grad_norm': 0.6543602823190313, 'learning_rate': 4.3646170628448504e-07, 'epoch': 0.87} + 87%|████████▋ | 10605/12188 [1:17:20<3:21:06, 7.62s/it] 87%|████████▋ | 10606/12188 [1:17:27<3:15:18, 7.41s/it] {'loss': 0.3155, 'grad_norm': 0.6947395571733106, 'learning_rate': 4.3591894061140005e-07, 'epoch': 0.87} + 87%|████████▋ | 10606/12188 [1:17:27<3:15:18, 7.41s/it] 87%|████████▋ | 10607/12188 [1:17:35<3:16:41, 7.46s/it] {'loss': 0.3274, 'grad_norm': 0.7406300672552798, 'learning_rate': 4.353764972463853e-07, 'epoch': 0.87} + 87%|████████▋ | 10607/12188 [1:17:35<3:16:41, 7.46s/it] 87%|████████▋ | 10608/12188 [1:17:42<3:11:01, 7.25s/it] {'loss': 0.2861, 'grad_norm': 0.683455057097568, 'learning_rate': 4.348343762277496e-07, 'epoch': 0.87} + 87%|████████▋ | 10608/12188 [1:17:42<3:11:01, 7.25s/it]W0818 09:30:22.146000 7432 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 09:30:22.146000 7432 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 09:30:22.146000 7432 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 09:30:22.146000 7432 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +*** +*** +W0818 09:30:22.141000 24210 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 09:30:22.141000 24210 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 09:30:22.141000 24210 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 09:30:22.141000 24210 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 09:30:24.247000 46973 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 09:30:24.247000 46973 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 09:30:24.247000 46973 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 09:30:24.247000 46973 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 09:31:41,212] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,212] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,229] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,229] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,232] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,234] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,236] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,239] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,244] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,248] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,248] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,250] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,262] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,265] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,263] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,264] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,264] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,266] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,266] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,267] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,269] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,271] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,271] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,271] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,265] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,265] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,265] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,265] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,278] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,271] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,285] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,281] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,279] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,281] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,282] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,282] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,283] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,283] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,283] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,279] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,279] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,294] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,296] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,297] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,297] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,297] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,290] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,291] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,291] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,292] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,294] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,294] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,294] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:41,294] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:31:58,227] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,217] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,224] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,224] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,224] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,224] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,224] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,224] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,221] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,221] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,221] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,221] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,221] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,221] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,222] [INFO] [comm.py:652:init_distributed] cdb=None +ing TorchBackend in DeepSpeed with backend nccl +[2025-08-18 09:31:58,231] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,235] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,235] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,236] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,236] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,236] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,237] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,238] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,238] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:31:58,669] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,697] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,710] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,704] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,727] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,728] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,765] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,779] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,781] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,784] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,790] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,789] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,791] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,792] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,792] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,793] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,796] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,821] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,822] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,823] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,828] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,828] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,828] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,829] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,836] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,839] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,839] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,843] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,844] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to mo[2025-08-18 09:31:58,846] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,847] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,848] [INFO] [config.py:733:__init__] Config mesh_devi[2025-08-18 09:31:58,836] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 + on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,848] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,844] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +[2025-08-18 09:31:58,846] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,848] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,864] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,864] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,867] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:31:58,868] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,869] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:58,870] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:31:59,213] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:32:01,080] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:32:01,082] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:32:01,083] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:32:01,083] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:32:01,084] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:32:01,084] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:32:01,085] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 621928 samples from VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl +Rank 0: Loading altas_windows +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 537672 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl +Rank 0: Loading altas_linux +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 21578 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl +Rank 0: Loading atlas_macos +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 3680 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl +Rank 0: Loading android_action_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 5621 samples from VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 11980 samples from VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl +Rank 0: Loading web_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9459 samples from VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 328 samples from VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 54 samples from VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 480 samples from VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 240 samples from VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 944 samples from VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 472 samples from VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading mac_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 1578 samples from VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20394 samples from VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl +Rank 0: Loading web_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 14285 samples from VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl +Rank 0: Loading android_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 3590 samples from VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 21422 samples from VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 100 samples from VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_aug_cropping_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 7675 samples from VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20116 samples from VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl +Rank 0: Loading iphone_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2520 samples from VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl +Rank 0: Loading mac_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7814 samples from VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl +Rank 0: Loading android_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 17838 samples from VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading android_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9008 samples from VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_canvas_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 624 samples from VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 100652 samples from VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl with all sampling strategy +Rank 0: Loaded 28346 samples from VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl +Rank 0: Loading android_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 4907 samples from VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2635 samples from VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5234 samples from VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading ubuntu_action_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl with all sampling strategy +Rank 0: Loaded 3404 samples from VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_1 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2855 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_2 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 655 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_3 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_4 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2643 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_1 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_2 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_3 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_4 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_5 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_6 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_7 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_8 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_crop_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 358 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7946 samples from VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3973 samples from VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 993 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 630 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 249 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2538 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1269 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 172 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl +Rank 0: Loading android_ocr_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl with all sampling strategy +Rank 0: Loaded 29878 samples from VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl +Rank 0: Loading mac_orc_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl with all sampling strategy +Rank 0: Loaded 4393 samples from VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 254 samples from VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_click_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl with random:80% sampling strategy +Rank 0: Loaded 1802 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 53 samples from VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 6578 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 3287 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 542 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_crop_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 270 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 10908 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 5453 samples from VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading android_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13950 samples from VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl +Rank 0: Loading windows_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 12390 samples from VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl +Rank 0: Loading web_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13402 samples from VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl +Rank 0: Loading icon_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl with all sampling strategy +Rank 0: Loaded 81303 samples from VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl +Rank 0: Loading mac_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 1251 samples from VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl +Rank 0: Loading iphone_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 27849 samples from VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl +Rank 0: Loading web_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl with random:80% sampling strategy +Rank 0: Loaded 51607 samples from VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl +Rank 0: Loading android_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl with random:80% sampling strategy +Rank 0: Loaded 6281 samples from VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl +Rank 0: Loading windows_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 25961 samples from VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl +Rank 0: Loading windows_cropping_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl with random:40% sampling strategy +Rank 0: Loaded 9763 samples from VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl +Rank 0: Loading iphone_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl with all sampling strategy +Rank 0: Loaded 16896 samples from VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl +Rank 0: Loading iphone_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl with all sampling strategy +Rank 0: Loaded 927 samples from VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl +Rank 0: Loading mac_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl with all sampling strategy +Rank 0: Loaded 3051 samples from VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl +Rank 0: Loading android_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 25217 samples from VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading android_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 12677 samples from VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading web_canvas_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl with random:40% sampling strategy +Rank 0: Loaded 1566 samples from VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl +Rank 0: Loading web_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 174170 samples from VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 24862 samples from VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl +Rank 0: Loading android_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl with random:80% sampling strategy +Rank 0: Loaded 9142 samples from VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl +Rank 0: Loading windows_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 4229 samples from VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 4200 samples from VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl with random:80% sampling strategy +Rank 0: Loaded 2868 samples from VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_crop_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1372 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 590 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl with all sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1692 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1077 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1070 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 431 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 292 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1509 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 1207 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading uibert_train_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 4646 samples from VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl +Rank 0: Loading openapp_taperception_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 2500 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_widget_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 14878 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_mug_grounding_d240812 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl with all sampling strategy +Rank 0: Loaded 26090 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl +Rank 0: Loading private_ui_phone_2403_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 24798 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ground_d240521_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl with all sampling strategy +Rank 0: Loaded 5008 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl +Rank 0: Loading private_ui_aig_share_2406_ground_d240612_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl with all sampling strategy +Rank 0: Loaded 7903 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl +Rank 0: Loading windows_pc_agent_e_planning_cot +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 55564 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl +Rank 0: Loading windows_pc_agent_e_navigation +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl with all sampling strategy +Rank 0: Loaded 27782 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl +Rank 0: Loading os_genesis_ac_training_data +Rank 0: Skipping os_genesis_ac_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_aw_training_data +Rank 0: Skipping os_genesis_aw_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_web_training +Rank 0: Skipping os_genesis_web_training due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_1 +Rank 0: Skipping gui_odyssey_plus_1 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_2 +Rank 0: Skipping gui_odyssey_plus_2 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_custom_3 +Rank 0: Skipping gui_odyssey_plus_custom_3 due to repeat_time=0 +Rank 0: Loading mm_gui_mid +Rank 0: Skipping mm_gui_mid due to repeat_time=0 +Rank 0: Loading text_gui_mid +Rank 0: Skipping text_gui_mid due to repeat_time=0 +Rank 0: Loading gui_mid_trajectory +Rank 0: Skipping gui_mid_trajectory due to repeat_time=0 +Rank 0: Loading ubuntu_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7024 samples from VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl +Rank 0: Loading windows_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3144 samples from VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl +Rank 0: Total training samples: 6240940 +Rank 0: Formatting inputs...Skip in lazy mode +Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. +Rank 0: Length of multimodal samples: 6230528, pure textual samples: 9984 +Parameter Offload: Total persistent parameters: 755712 in 408 params +W0818 09:38:38.881000 55169 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 09:38:38.881000 55169 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 09:38:38.881000 55169 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 09:38:38.881000 55169 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 09:38:40.561000 64777 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 09:38:40.561000 64777 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 09:38:40.561000 64777 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 09:38:40.561000 64777 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 09:38:43.750000 69412 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 09:38:43.750000 69412 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 09:38:43.750000 69412 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 09:38:43.750000 69412 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 09:38:44.684000 102685 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 09:38:44.684000 102685 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 09:38:44.684000 102685 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 09:38:44.684000 102685 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 09:38:48.390000 47573 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 09:38:48.390000 47573 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 09:38:48.390000 47573 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 09:38:48.390000 47573 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 09:38:50.401000 88318 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 09:38:50.401000 88318 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 09:38:50.401000 88318 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 09:38:50.401000 88318 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 09:38:51.951000 47417 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 09:38:51.951000 47417 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 09:38:51.951000 47417 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 09:38:51.951000 47417 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 09:39:11,639] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:11,655] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:11,655] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:11,655] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:11,655] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:11,655] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:11,655] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:11,655] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:15,334] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:15,335] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:15,335] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:15,336] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:15,336] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:15,336] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:15,336] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:15,337] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:15,758] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:15,879] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:15,879] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:15,891] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:15,892] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:15,892] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:15,895] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:15,896] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:21,105] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,106] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,106] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,141] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,140] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,140] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,147] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,148] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,154] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,157] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,153] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,154] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,154] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,154] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,159] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,159] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,165] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,166] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,166] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,172] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,173] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,173] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,176] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,176] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,178] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:21,178] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:28,042] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:28,048] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:28,043] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:28,043] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:28,043] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:28,043] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:28,043] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:28,043] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:28,046] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:28,046] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:28,046] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:28,046] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:28,046] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:28,047] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:28,047] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:28,048] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:28,495] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:28,515] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:28,527] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:28,533] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:28,624] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:28,632] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +[2025-08-18 09:39:28,633] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:28,633] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:28,629] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:28,631] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:28,634] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:28,634] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:28,634] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:28,653] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:28,659] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:28,659] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:28,664] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:28,666] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:28,671] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:28,668] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:28,668] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +[2025-08-18 09:39:28,675] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:28,676] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:28,676] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:29,811] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:29,841] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:29,842] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:29,842] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:29,843] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:29,843] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:29,843] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:29,843] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:33,829] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:33,861] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:33,861] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:33,896] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:33,896] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:33,897] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:33,897] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:33,897] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 09:39:37,092] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:37,092] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:37,092] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:37,092] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:37,093] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:37,093] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:37,093] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:37,094] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:37,686] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:37,821] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:37,830] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:37,831] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:37,831] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:37,831] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:37,832] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:37,832] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:43,497] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:43,497] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:43,497] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:43,498] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:43,498] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:43,499] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:43,499] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:43,499] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-18 09:39:43,500] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 09:39:44,491] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:44,649] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:44,671] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:44,684] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:44,707] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:44,711] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 09:39:44,719] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 09:39:44,720] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 621928 samples from VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl +Rank 0: Loading altas_windows +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 537672 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl +Rank 0: Loading altas_linux +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 21578 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl +Rank 0: Loading atlas_macos +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 3680 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl +Rank 0: Loading android_action_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 5621 samples from VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 11980 samples from VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl +Rank 0: Loading web_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9459 samples from VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 328 samples from VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 54 samples from VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 480 samples from VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 240 samples from VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 944 samples from VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 472 samples from VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading mac_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 1578 samples from VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20394 samples from VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl +Rank 0: Loading web_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 14285 samples from VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl +Rank 0: Loading android_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 3590 samples from VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 21422 samples from VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 100 samples from VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_aug_cropping_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 7675 samples from VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20116 samples from VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl +Rank 0: Loading iphone_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2520 samples from VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl +Rank 0: Loading mac_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7814 samples from VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl +Rank 0: Loading android_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 17838 samples from VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading android_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9008 samples from VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_canvas_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 624 samples from VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 100652 samples from VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl with all sampling strategy +Rank 0: Loaded 28346 samples from VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl +Rank 0: Loading android_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 4907 samples from VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2635 samples from VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5234 samples from VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading ubuntu_action_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl with all sampling strategy +Rank 0: Loaded 3404 samples from VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_1 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2855 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_2 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 655 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_3 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_4 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2643 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_1 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_2 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_3 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_4 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_5 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_6 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_7 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_8 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_crop_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 358 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7946 samples from VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3973 samples from VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 993 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 630 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 249 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2538 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1269 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 172 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl +Rank 0: Loading android_ocr_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl with all sampling strategy +Rank 0: Loaded 29878 samples from VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl +Rank 0: Loading mac_orc_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl with all sampling strategy +Rank 0: Loaded 4393 samples from VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 254 samples from VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_click_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl with random:80% sampling strategy +Rank 0: Loaded 1802 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 53 samples from VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 6578 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 3287 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 542 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_crop_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 270 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 10908 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 5453 samples from VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading android_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13950 samples from VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl +Rank 0: Loading windows_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 12390 samples from VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl +Rank 0: Loading web_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13402 samples from VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl +Rank 0: Loading icon_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl with all sampling strategy +Rank 0: Loaded 81303 samples from VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl +Rank 0: Loading mac_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 1251 samples from VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl +Rank 0: Loading iphone_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 27849 samples from VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl +Rank 0: Loading web_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl with random:80% sampling strategy +Rank 0: Loaded 51607 samples from VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl +Rank 0: Loading android_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl with random:80% sampling strategy +Rank 0: Loaded 6281 samples from VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl +Rank 0: Loading windows_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 25961 samples from VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl +Rank 0: Loading windows_cropping_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl with random:40% sampling strategy +Rank 0: Loaded 9763 samples from VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl +Rank 0: Loading iphone_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl with all sampling strategy +Rank 0: Loaded 16896 samples from VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl +Rank 0: Loading iphone_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl with all sampling strategy +Rank 0: Loaded 927 samples from VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl +Rank 0: Loading mac_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl with all sampling strategy +Rank 0: Loaded 3051 samples from VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl +Rank 0: Loading android_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 25217 samples from VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading android_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 12677 samples from VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading web_canvas_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl with random:40% sampling strategy +Rank 0: Loaded 1566 samples from VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl +Rank 0: Loading web_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 174170 samples from VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 24862 samples from VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl +Rank 0: Loading android_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl with random:80% sampling strategy +Rank 0: Loaded 9142 samples from VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl +Rank 0: Loading windows_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 4229 samples from VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 4200 samples from VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl with random:80% sampling strategy +Rank 0: Loaded 2868 samples from VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_crop_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1372 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 590 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl with all sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1692 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1077 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1070 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 431 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 292 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1509 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 1207 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading uibert_train_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 4646 samples from VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl +Rank 0: Loading openapp_taperception_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 2500 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_widget_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 14878 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_mug_grounding_d240812 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl with all sampling strategy +Rank 0: Loaded 26090 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl +Rank 0: Loading private_ui_phone_2403_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 24798 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ground_d240521_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl with all sampling strategy +Rank 0: Loaded 5008 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl +Rank 0: Loading private_ui_aig_share_2406_ground_d240612_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl with all sampling strategy +Rank 0: Loaded 7903 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl +Rank 0: Loading windows_pc_agent_e_planning_cot +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 55564 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl +Rank 0: Loading windows_pc_agent_e_navigation +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl with all sampling strategy +Rank 0: Loaded 27782 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl +Rank 0: Loading os_genesis_ac_training_data +Rank 0: Skipping os_genesis_ac_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_aw_training_data +Rank 0: Skipping os_genesis_aw_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_web_training +Rank 0: Skipping os_genesis_web_training due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_1 +Rank 0: Skipping gui_odyssey_plus_1 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_2 +Rank 0: Skipping gui_odyssey_plus_2 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_custom_3 +Rank 0: Skipping gui_odyssey_plus_custom_3 due to repeat_time=0 +Rank 0: Loading mm_gui_mid +Rank 0: Skipping mm_gui_mid due to repeat_time=0 +Rank 0: Loading text_gui_mid +Rank 0: Skipping text_gui_mid due to repeat_time=0 +Rank 0: Loading gui_mid_trajectory +Rank 0: Skipping gui_mid_trajectory due to repeat_time=0 +Rank 0: Loading ubuntu_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7024 samples from VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl +Rank 0: Loading windows_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3144 samples from VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl +Rank 0: Total training samples: 6240940 +Rank 0: Formatting inputs...Skip in lazy mode +Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. +Rank 0: Length of multimodal samples: 6230528, pure textual samples: 9984 +Parameter Offload: Total persistent parameters: 755712 in 408 params + 0%| | 0/12188 [00:00 + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f1e7bc37790> +[Try #0] Failed to fetch sample 4409357 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f1e7bc37790> +Problematic sample: {'image': '20240823_045930_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Data Providers'"}, {'from': 'gpt', 'value': '\nclick(x=0.8015, y=0.921)\n'}]} + 82%|████████▏ | 10042/12188 [07:02<4:28:49, 7.52s/it] {'loss': 0.2659, 'grad_norm': 0.6332035457625617, 'learning_rate': 7.919700013163128e-07, 'epoch': 0.82} + 82%|████████▏ | 10042/12188 [07:02<4:28:49, 7.52s/it] 82%|████████▏ | 10043/12188 [07:09<4:28:47, 7.52s/it] {'loss': 0.3086, 'grad_norm': 0.6842527258634092, 'learning_rate': 7.912525264167342e-07, 'epoch': 0.82} + 82%|████████▏ | 10043/12188 [07:09<4:28:47, 7.52s/it] 82%|████████▏ | 10044/12188 [07:18<4:48:08, 8.06s/it] {'loss': 0.304, 'grad_norm': 0.6957439487539033, 'learning_rate': 7.905353487321404e-07, 'epoch': 0.82} + 82%|████████▏ | 10044/12188 [07:18<4:48:08, 8.06s/it] 82%|████████▏ | 10045/12188 [07:27<4:51:11, 8.15s/it] {'loss': 0.3281, 'grad_norm': 0.7338498226193055, 'learning_rate': 7.898184683131782e-07, 'epoch': 0.82} + 82%|████████▏ | 10045/12188 [07:27<4:51:11, 8.15s/it] 82%|████████▏ | 10046/12188 [07:34<4:42:02, 7.90s/it] {'loss': 0.3076, 'grad_norm': 0.7073287883004532, 'learning_rate': 7.891018852104709e-07, 'epoch': 0.82} + 82%|████████▏ | 10046/12188 [07:34<4:42:02, 7.90s/it] 82%|████████▏ | 10047/12188 [07:41<4:28:36, 7.53s/it] {'loss': 0.3055, 'grad_norm': 0.7270156504473465, 'learning_rate': 7.883855994746237e-07, 'epoch': 0.82} + 82%|████████▏ | 10047/12188 [07:41<4:28:36, 7.53s/it] 82%|████████▏ | 10048/12188 [07:48<4:20:43, 7.31s/it] {'loss': 0.2811, 'grad_norm': 0.6385622616913829, 'learning_rate': 7.876696111562182e-07, 'epoch': 0.82} + 82%|████████▏ | 10048/12188 [07:48<4:20:43, 7.31s/it] 82%|████████▏ | 10049/12188 [07:56<4:27:53, 7.51s/it] {'loss': 0.3307, 'grad_norm': 0.7067303390062732, 'learning_rate': 7.869539203058169e-07, 'epoch': 0.82} + 82%|████████▏ | 10049/12188 [07:56<4:27:53, 7.51s/it] 82%|████████▏ | 10050/12188 [08:03<4:21:56, 7.35s/it] {'loss': 0.2742, 'grad_norm': 0.8430468239345277, 'learning_rate': 7.862385269739625e-07, 'epoch': 0.82} + 82%|███���████▏ | 10050/12188 [08:03<4:21:56, 7.35s/it] 82%|████████▏ | 10051/12188 [08:10<4:19:31, 7.29s/it] {'loss': 0.3129, 'grad_norm': 0.6489417188172067, 'learning_rate': 7.855234312111732e-07, 'epoch': 0.82} + 82%|████████▏ | 10051/12188 [08:10<4:19:31, 7.29s/it] 82%|████████▏ | 10052/12188 [08:16<4:13:00, 7.11s/it] {'loss': 0.3053, 'grad_norm': 0.6351289719845153, 'learning_rate': 7.848086330679483e-07, 'epoch': 0.82} + 82%|████████▏ | 10052/12188 [08:16<4:13:00, 7.11s/it] 82%|████████▏ | 10053/12188 [08:24<4:23:57, 7.42s/it] {'loss': 0.3086, 'grad_norm': 0.7125407036545471, 'learning_rate': 7.840941325947637e-07, 'epoch': 0.82} + 82%|████████▏ | 10053/12188 [08:25<4:23:57, 7.42s/it] 82%|████████▏ | 10054/12188 [08:31<4:17:16, 7.23s/it] {'loss': 0.2684, 'grad_norm': 0.7532320687248092, 'learning_rate': 7.833799298420786e-07, 'epoch': 0.82} + 82%|████████▏ | 10054/12188 [08:31<4:17:16, 7.23s/it] 82%|████████▏ | 10055/12188 [08:39<4:19:59, 7.31s/it] {'loss': 0.2825, 'grad_norm': 1.052881573418256, 'learning_rate': 7.826660248603296e-07, 'epoch': 0.82} + 82%|████████▏ | 10055/12188 [08:39<4:19:59, 7.31s/it] 83%|████████▎ | 10056/12188 [08:46<4:20:06, 7.32s/it] {'loss': 0.3118, 'grad_norm': 0.6878342288746125, 'learning_rate': 7.819524176999288e-07, 'epoch': 0.83} + 83%|████████▎ | 10056/12188 [08:46<4:20:06, 7.32s/it] 83%|████████▎ | 10057/12188 [08:54<4:28:37, 7.56s/it] {'loss': 0.3044, 'grad_norm': 0.7388437976687655, 'learning_rate': 7.812391084112731e-07, 'epoch': 0.83} + 83%|████████▎ | 10057/12188 [08:54<4:28:37, 7.56s/it] 83%|████████▎ | 10058/12188 [09:03<4:37:29, 7.82s/it] {'loss': 0.2976, 'grad_norm': 0.7591566863398036, 'learning_rate': 7.80526097044732e-07, 'epoch': 0.83} + 83%|████████▎ | 10058/12188 [09:03<4:37:29, 7.82s/it] 83%|████████▎ | 10059/12188 [09:10<4:36:02, 7.78s/it] {'loss': 0.3024, 'grad_norm': 0.7547625831573729, 'learning_rate': 7.798133836506588e-07, 'epoch': 0.83} + 83%|████████▎ | 10059/12188 [09:10<4:36:02, 7.78s/it] 83%|████████▎ | 10060/12188 [09:18<4:29:32, 7.60s/it] {'loss': 0.337, 'grad_norm': 0.8759024627662536, 'learning_rate': 7.791009682793855e-07, 'epoch': 0.83} + 83%|████████▎ | 10060/12188 [09:18<4:29:32, 7.60s/it] 83%|████████▎ | 10061/12188 [09:24<4:21:45, 7.38s/it] {'loss': 0.3616, 'grad_norm': 0.6929548316055744, 'learning_rate': 7.783888509812193e-07, 'epoch': 0.83} + 83%|████████▎ | 10061/12188 [09:24<4:21:45, 7.38s/it] 83%|████████▎ | 10062/12188 [09:31<4:17:32, 7.27s/it] {'loss': 0.3077, 'grad_norm': 0.6820991423979331, 'learning_rate': 7.776770318064497e-07, 'epoch': 0.83} + 83%|████████▎ | 10062/12188 [09:31<4:17:32, 7.27s/it] 83%|████████▎ | 10063/12188 [09:38<4:14:10, 7.18s/it] {'loss': 0.2943, 'grad_norm': 0.6879279348846127, 'learning_rate': 7.769655108053459e-07, 'epoch': 0.83} + 83%|████████▎ | 10063/12188 [09:38<4:14:10, 7.18s/it] 83%|████████▎ | 10064/12188 [09:45<4:12:00, 7.12s/it] {'loss': 0.2831, 'grad_norm': 0.7122945267483649, 'learning_rate': 7.762542880281526e-07, 'epoch': 0.83} + 83%|████████▎ | 10064/12188 [09:45<4:12:00, 7.12s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fcddf246c50> +[Try #0] Failed to fetch sample 4385424 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fcddf246c50> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Wiktionary'"}, {'from': 'gpt', 'value': '\nclick(x=0.904, y=0.465)\n'}]} + 83%|████████▎ | 10065/12188 [09:54<4:25:05, 7.49s/it] {'loss': 0.2737, 'grad_norm': 0.728838674751376, 'learning_rate': 7.755433635250948e-07, 'epoch': 0.83} + 83%|████████▎ | 10065/12188 [09:54<4:25:05, 7.49s/it] 83%|████████▎ | 10066/12188 [10:00<4:15:39, 7.23s/it] {'loss': 0.3043, 'grad_norm': 0.7351531135663574, 'learning_rate': 7.748327373463782e-07, 'epoch': 0.83} + 83%|████████▎ | 10066/12188 [10:00<4:15:39, 7.23s/it] 83%|████████▎ | 10067/12188 [10:07<4:08:27, 7.03s/it] {'loss': 0.2925, 'grad_norm': 0.7120202771069294, 'learning_rate': 7.741224095421845e-07, 'epoch': 0.83} + 83%|████████▎ | 10067/12188 [10:07<4:08:27, 7.03s/it] 83%|████████▎ | 10068/12188 [10:14<4:05:33, 6.95s/it] {'loss': 0.3397, 'grad_norm': 0.732530748820669, 'learning_rate': 7.734123801626781e-07, 'epoch': 0.83} + 83%|████████▎ | 10068/12188 [10:14<4:05:33, 6.95s/it] 83%|████████▎ | 10069/12188 [10:20<4:03:16, 6.89s/it] {'loss': 0.3236, 'grad_norm': 0.6849049966849552, 'learning_rate': 7.727026492579976e-07, 'epoch': 0.83} + 83%|████████▎ | 10069/12188 [10:20<4:03:16, 6.89s/it] 83%|████████▎ | 10070/12188 [10:29<4:25:42, 7.53s/it] {'loss': 0.265, 'grad_norm': 0.7122965239443819, 'learning_rate': 7.719932168782656e-07, 'epoch': 0.83} + 83%|████████▎ | 10070/12188 [10:29<4:25:42, 7.53s/it] 83%|████████▎ | 10071/12188 [10:36<4:17:28, 7.30s/it] {'loss': 0.2959, 'grad_norm': 0.7899164024975511, 'learning_rate': 7.712840830735785e-07, 'epoch': 0.83} + 83%|████████▎ | 10071/12188 [10:36<4:17:28, 7.30s/it] 83%|████████▎ | 10072/12188 [10:44<4:17:44, 7.31s/it] {'loss': 0.3173, 'grad_norm': 0.8726886103910866, 'learning_rate': 7.705752478940154e-07, 'epoch': 0.83} + 83%|████████▎ | 10072/12188 [10:44<4:17:44, 7.31s/it] 83%|████████▎ | 10073/12188 [10:53<4:41:09, 7.98s/it] {'loss': 0.2957, 'grad_norm': 0.6769767659345216, 'learning_rate': 7.698667113896346e-07, 'epoch': 0.83} + 83%|████████▎ | 10073/12188 [10:53<4:41:09, 7.98s/it] 83%|████████▎ | 10074/12188 [11:03<4:59:06, 8.49s/it] {'loss': 0.3382, 'grad_norm': 0.7464222784211877, 'learning_rate': 7.69158473610469e-07, 'epoch': 0.83} + 83%|████████▎ | 10074/12188 [11:03<4:59:06, 8.49s/it] 83%|████████▎ | 10075/12188 [11:11<4:52:46, 8.31s/it] {'loss': 0.3173, 'grad_norm': 0.8360504901162767, 'learning_rate': 7.684505346065363e-07, 'epoch': 0.83} + 83%|████████▎ | 10075/12188 [11:11<4:52:46, 8.31s/it] 83%|████████▎ | 10076/12188 [11:17<4:34:12, 7.79s/it] {'loss': 0.3171, 'grad_norm': 0.6997610347296596, 'learning_rate': 7.677428944278271e-07, 'epoch': 0.83} + 83%|████████▎ | 10076/12188 [11:17<4:34:12, 7.79s/it] 83%|████████▎ | 10077/12188 [11:24<4:23:18, 7.48s/it] {'loss': 0.2708, 'grad_norm': 0.7974111540855674, 'learning_rate': 7.670355531243145e-07, 'epoch': 0.83} + 83%|████████▎ | 10077/12188 [11:24<4:23:18, 7.48s/it] 83%|████████▎ | 10078/12188 [11:34<4:50:10, 8.25s/it] {'loss': 0.2924, 'grad_norm': 0.7317177866094533, 'learning_rate': 7.663285107459517e-07, 'epoch': 0.83} + 83%|████████▎ | 10078/12188 [11:34<4:50:10, 8.25s/it] 83%|████████▎ | 10079/12188 [11:42<4:52:03, 8.31s/it] {'loss': 0.2761, 'grad_norm': 0.7670118342784203, 'learning_rate': 7.65621767342668e-07, 'epoch': 0.83} + 83%|████████▎ | 10079/12188 [11:42<4:52:03, 8.31s/it] 83%|████████▎ | 10080/12188 [11:50<4:41:00, 8.00s/it] {'loss': 0.3121, 'grad_norm': 0.6700800090496124, 'learning_rate': 7.649153229643708e-07, 'epoch': 0.83} + 83%|████████▎ | 10080/12188 [11:50<4:41:00, 8.00s/it] 83%|████████▎ | 10081/12188 [11:57<4:32:43, 7.77s/it] {'loss': 0.3327, 'grad_norm': 0.7053434639499931, 'learning_rate': 7.6420917766095e-07, 'epoch': 0.83} + 83%|████████▎ | 10081/12188 [11:57<4:32:43, 7.77s/it] 83%|████████▎ | 10082/12188 [12:05<4:37:08, 7.90s/it] {'loss': 0.3061, 'grad_norm': 0.9710875474450664, 'learning_rate': 7.63503331482271e-07, 'epoch': 0.83} + 83%|████████▎ | 10082/12188 [12:05<4:37:08, 7.90s/it] 83%|████████▎ | 10083/12188 [12:14<4:51:08, 8.30s/it] {'loss': 0.2966, 'grad_norm': 0.6650459031596843, 'learning_rate': 7.627977844781815e-07, 'epoch': 0.83} + 83%|████████▎ | 10083/12188 [12:14<4:51:08, 8.30s/it] 83%|████████▎ | 10084/12188 [12:21<4:33:35, 7.80s/it] {'loss': 0.3, 'grad_norm': 0.8238757900723065, 'learning_rate': 7.620925366985033e-07, 'epoch': 0.83} + 83%|████████▎ | 10084/12188 [12:21<4:33:35, 7.80s/it] 83%|████████▎ | 10085/12188 [12:30<4:42:51, 8.07s/it] {'loss': 0.2712, 'grad_norm': 0.7108684287220504, 'learning_rate': 7.613875881930416e-07, 'epoch': 0.83} + 83%|████████▎ | 10085/12188 [12:30<4:42:51, 8.07s/it] 83%|████████▎ | 10086/12188 [12:37<4:33:30, 7.81s/it] {'loss': 0.2989, 'grad_norm': 0.6923621901241725, 'learning_rate': 7.606829390115799e-07, 'epoch': 0.83} + 83%|████████▎ | 10086/12188 [12:37<4:33:30, 7.81s/it] 83%|████████▎ | 10087/12188 [12:44<4:22:13, 7.49s/it] {'loss': 0.2798, 'grad_norm': 0.8495108707642233, 'learning_rate': 7.599785892038764e-07, 'epoch': 0.83} + 83%|████████▎ | 10087/12188 [12:44<4:22:13, 7.49s/it] 83%|████████▎ | 10088/12188 [12:54<4:49:05, 8.26s/it] {'loss': 0.2761, 'grad_norm': 0.7482394324231568, 'learning_rate': 7.592745388196748e-07, 'epoch': 0.83} + 83%|████████▎ | 10088/12188 [12:54<4:49:05, 8.26s/it] 83%|████████▎ | 10089/12188 [13:01<4:33:59, 7.83s/it] {'loss': 0.278, 'grad_norm': 0.6690664856358964, 'learning_rate': 7.585707879086901e-07, 'epoch': 0.83} + 83%|████████▎ | 10089/12188 [13:01<4:33:59, 7.83s/it] 83%|████████▎ | 10090/12188 [13:09<4:45:05, 8.15s/it] {'loss': 0.3055, 'grad_norm': 0.6640381756856596, 'learning_rate': 7.578673365206224e-07, 'epoch': 0.83} + 83%|████████▎ | 10090/12188 [13:10<4:45:05, 8.15s/it] 83%|████████▎ | 10091/12188 [13:20<5:04:37, 8.72s/it] {'loss': 0.3094, 'grad_norm': 0.6549153739500952, 'learning_rate': 7.571641847051492e-07, 'epoch': 0.83} + 83%|████████▎ | 10091/12188 [13:20<5:04:37, 8.72s/it] 83%|████████▎ | 10092/12188 [13:29<5:11:58, 8.93s/it] {'loss': 0.3002, 'grad_norm': 0.6589963324135486, 'learning_rate': 7.564613325119241e-07, 'epoch': 0.83} + 83%|████████▎ | 10092/12188 [13:29<5:11:58, 8.93s/it] 83%|████████▎ | 10093/12188 [13:36<4:49:39, 8.30s/it] {'loss': 0.2752, 'grad_norm': 0.8663676780646602, 'learning_rate': 7.557587799905813e-07, 'epoch': 0.83} + 83%|████████▎ | 10093/12188 [13:36<4:49:39, 8.30s/it] 83%|████���███▎ | 10094/12188 [13:47<5:25:14, 9.32s/it] {'loss': 0.2805, 'grad_norm': 0.7707882121394342, 'learning_rate': 7.550565271907357e-07, 'epoch': 0.83} + 83%|████████▎ | 10094/12188 [13:47<5:25:14, 9.32s/it] 83%|████████▎ | 10095/12188 [13:56<5:14:40, 9.02s/it] {'loss': 0.2994, 'grad_norm': 0.7146740132076194, 'learning_rate': 7.543545741619762e-07, 'epoch': 0.83} + 83%|████████▎ | 10095/12188 [13:56<5:14:40, 9.02s/it] 83%|████████▎ | 10096/12188 [14:03<4:53:28, 8.42s/it] {'loss': 0.2604, 'grad_norm': 0.6787832114927901, 'learning_rate': 7.536529209538773e-07, 'epoch': 0.83} + 83%|████████▎ | 10096/12188 [14:03<4:53:28, 8.42s/it] 83%|████████▎ | 10097/12188 [14:10<4:37:18, 7.96s/it] {'loss': 0.2861, 'grad_norm': 0.5699735347874475, 'learning_rate': 7.52951567615986e-07, 'epoch': 0.83} + 83%|████████▎ | 10097/12188 [14:10<4:37:18, 7.96s/it] 83%|████████▎ | 10098/12188 [14:17<4:25:37, 7.63s/it] {'loss': 0.2751, 'grad_norm': 0.7556619388128064, 'learning_rate': 7.522505141978309e-07, 'epoch': 0.83} + 83%|████████▎ | 10098/12188 [14:17<4:25:37, 7.63s/it] 83%|████████▎ | 10099/12188 [14:23<4:15:00, 7.32s/it] {'loss': 0.2818, 'grad_norm': 0.7974343297630987, 'learning_rate': 7.515497607489213e-07, 'epoch': 0.83} + 83%|████████▎ | 10099/12188 [14:23<4:15:00, 7.32s/it] 83%|████████▎ | 10100/12188 [14:33<4:38:33, 8.00s/it] {'loss': 0.3033, 'grad_norm': 0.6455472724507989, 'learning_rate': 7.508493073187411e-07, 'epoch': 0.83} + 83%|████████▎ | 10100/12188 [14:33<4:38:33, 8.00s/it] 83%|████████▎ | 10101/12188 [14:42<4:54:08, 8.46s/it] {'loss': 0.3147, 'grad_norm': 0.8131925653946845, 'learning_rate': 7.501491539567574e-07, 'epoch': 0.83} + 83%|████████▎ | 10101/12188 [14:42<4:54:08, 8.46s/it] 83%|████████▎ | 10102/12188 [14:50<4:47:54, 8.28s/it] {'loss': 0.319, 'grad_norm': 0.720508848978785, 'learning_rate': 7.494493007124109e-07, 'epoch': 0.83} + 83%|████████▎ | 10102/12188 [14:50<4:47:54, 8.28s/it] 83%|████████▎ | 10103/12188 [14:57<4:30:55, 7.80s/it] {'loss': 0.2969, 'grad_norm': 0.7000031469916355, 'learning_rate': 7.487497476351258e-07, 'epoch': 0.83} + 83%|████████▎ | 10103/12188 [14:57<4:30:55, 7.80s/it] 83%|████████▎ | 10104/12188 [15:06<4:44:25, 8.19s/it] {'loss': 0.3554, 'grad_norm': 0.7093287181303876, 'learning_rate': 7.480504947743044e-07, 'epoch': 0.83} + 83%|████████▎ | 10104/12188 [15:06<4:44:25, 8.19s/it] 83%|████████▎ | 10105/12188 [15:12<4:27:38, 7.71s/it] {'loss': 0.3122, 'grad_norm': 0.6287043344383577, 'learning_rate': 7.473515421793248e-07, 'epoch': 0.83} + 83%|████████▎ | 10105/12188 [15:13<4:27:38, 7.71s/it] 83%|████████▎ | 10106/12188 [15:21<4:37:26, 8.00s/it] {'loss': 0.2863, 'grad_norm': 0.7146035653445667, 'learning_rate': 7.466528898995479e-07, 'epoch': 0.83} + 83%|████████▎ | 10106/12188 [15:21<4:37:26, 8.00s/it] 83%|████████▎ | 10107/12188 [15:28<4:26:28, 7.68s/it] {'loss': 0.3072, 'grad_norm': 0.6769234464683759, 'learning_rate': 7.459545379843108e-07, 'epoch': 0.83} + 83%|████████▎ | 10107/12188 [15:28<4:26:28, 7.68s/it] 83%|████████▎ | 10108/12188 [15:35<4:22:22, 7.57s/it] {'loss': 0.2926, 'grad_norm': 0.6784715999359491, 'learning_rate': 7.452564864829281e-07, 'epoch': 0.83} + 83%|████████▎ | 10108/12188 [15:35<4:22:22, 7.57s/it] 83%|████████▎ | 10109/12188 [15:44<4:31:40, 7.84s/it] {'loss': 0.3005, 'grad_norm': 0.6573358723499471, 'learning_rate': 7.445587354446975e-07, 'epoch': 0.83} + 83%|████████▎ | 10109/12188 [15:44<4:31:40, 7.84s/it] 83%|████████▎ | 10110/12188 [15:52<4:32:32, 7.87s/it] {'loss': 0.3453, 'grad_norm': 0.7114773250112104, 'learning_rate': 7.438612849188915e-07, 'epoch': 0.83} + 83%|████████▎ | 10110/12188 [15:52<4:32:32, 7.87s/it] 83%|████████▎ | 10111/12188 [15:59<4:22:37, 7.59s/it] {'loss': 0.3538, 'grad_norm': 0.6660436278959235, 'learning_rate': 7.43164134954763e-07, 'epoch': 0.83} + 83%|████████▎ | 10111/12188 [15:59<4:22:37, 7.59s/it] 83%|████████▎ | 10112/12188 [16:06<4:14:25, 7.35s/it] {'loss': 0.2951, 'grad_norm': 0.6748712661788699, 'learning_rate': 7.424672856015458e-07, 'epoch': 0.83} + 83%|████████▎ | 10112/12188 [16:06<4:14:25, 7.35s/it] 83%|████████▎ | 10113/12188 [16:14<4:27:08, 7.72s/it] {'loss': 0.3139, 'grad_norm': 0.7758118644848734, 'learning_rate': 7.417707369084476e-07, 'epoch': 0.83} + 83%|████████▎ | 10113/12188 [16:14<4:27:08, 7.72s/it] 83%|████████▎ | 10114/12188 [16:21<4:18:37, 7.48s/it] {'loss': 0.3178, 'grad_norm': 0.6469909310952888, 'learning_rate': 7.41074488924659e-07, 'epoch': 0.83} + 83%|████████▎ | 10114/12188 [16:21<4:18:37, 7.48s/it] 83%|████████▎ | 10115/12188 [16:28<4:09:55, 7.23s/it] {'loss': 0.3236, 'grad_norm': 0.650123023972366, 'learning_rate': 7.40378541699347e-07, 'epoch': 0.83} + 83%|████████▎ | 10115/12188 [16:28<4:09:55, 7.23s/it] 83%|████████▎ | 10116/12188 [16:34<4:03:57, 7.06s/it] {'loss': 0.3018, 'grad_norm': 0.6815237072167777, 'learning_rate': 7.396828952816587e-07, 'epoch': 0.83} + 83%|████████▎ | 10116/12188 [16:34<4:03:57, 7.06s/it] 83%|████████▎ | 10117/12188 [16:43<4:20:43, 7.55s/it] {'loss': 0.2678, 'grad_norm': 1.0344537587357356, 'learning_rate': 7.389875497207205e-07, 'epoch': 0.83} + 83%|████████▎ | 10117/12188 [16:43<4:20:43, 7.55s/it] 83%|████████▎ | 10118/12188 [16:50<4:11:44, 7.30s/it] {'loss': 0.2896, 'grad_norm': 0.7536735685094107, 'learning_rate': 7.382925050656348e-07, 'epoch': 0.83} + 83%|████████▎ | 10118/12188 [16:50<4:11:44, 7.30s/it] 83%|████████▎ | 10119/12188 [16:56<4:05:14, 7.11s/it] {'loss': 0.2976, 'grad_norm': 0.7019164569634927, 'learning_rate': 7.375977613654861e-07, 'epoch': 0.83} + 83%|████████▎ | 10119/12188 [16:56<4:05:14, 7.11s/it] 83%|████████▎ | 10120/12188 [17:03<3:57:47, 6.90s/it] {'loss': 0.3078, 'grad_norm': 0.7810886339189932, 'learning_rate': 7.369033186693359e-07, 'epoch': 0.83} + 83%|████████▎ | 10120/12188 [17:03<3:57:47, 6.90s/it] 83%|████████▎ | 10121/12188 [17:10<3:59:25, 6.95s/it] {'loss': 0.3016, 'grad_norm': 0.7581267290923134, 'learning_rate': 7.362091770262231e-07, 'epoch': 0.83} + 83%|████████▎ | 10121/12188 [17:10<3:59:25, 6.95s/it] 83%|████████▎ | 10122/12188 [17:17<3:56:06, 6.86s/it] {'loss': 0.2913, 'grad_norm': 0.7234472714076594, 'learning_rate': 7.355153364851686e-07, 'epoch': 0.83} + 83%|████████▎ | 10122/12188 [17:17<3:56:06, 6.86s/it] 83%|████████▎ | 10123/12188 [17:23<3:55:21, 6.84s/it] {'loss': 0.2565, 'grad_norm': 0.6569488342788854, 'learning_rate': 7.348217970951687e-07, 'epoch': 0.83} + 83%|████████▎ | 10123/12188 [17:23<3:55:21, 6.84s/it] 83%|████████▎ | 10124/12188 [17:30<3:54:32, 6.82s/it] {'loss': 0.3226, 'grad_norm': 0.678792923435257, 'learning_rate': 7.341285589052022e-07, 'epoch': 0.83} + 83%|████████▎ | 10124/12188 [17:30<3:54:32, 6.82s/it] 83%|████████▎ | 10125/12188 [17:37<3:54:30, 6.82s/it] {'loss': 0.32, 'grad_norm': 0.7421216404903279, 'learning_rate': 7.334356219642219e-07, 'epoch': 0.83} + 83%|████████▎ | 10125/12188 [17:37<3:54:30, 6.82s/it] 83%|████████▎ | 10126/12188 [17:44<3:57:06, 6.90s/it] {'loss': 0.3411, 'grad_norm': 0.8464167971781283, 'learning_rate': 7.327429863211633e-07, 'epoch': 0.83} + 83%|████████▎ | 10126/12188 [17:44<3:57:06, 6.90s/it] 83%|████████▎ | 10127/12188 [17:51<3:54:37, 6.83s/it] {'loss': 0.2656, 'grad_norm': 0.7224723351504573, 'learning_rate': 7.320506520249404e-07, 'epoch': 0.83} + 83%|████████▎ | 10127/12188 [17:51<3:54:37, 6.83s/it] 83%|████████▎ | 10128/12188 [17:59<4:09:49, 7.28s/it] {'loss': 0.287, 'grad_norm': 0.6753102542497861, 'learning_rate': 7.313586191244421e-07, 'epoch': 0.83} + 83%|████████▎ | 10128/12188 [17:59<4:09:49, 7.28s/it] 83%|████████▎ | 10129/12188 [18:06<4:03:01, 7.08s/it] {'loss': 0.3044, 'grad_norm': 0.663312436234375, 'learning_rate': 7.306668876685402e-07, 'epoch': 0.83} + 83%|████████▎ | 10129/12188 [18:06<4:03:01, 7.08s/it] 83%|████████▎ | 10130/12188 [18:12<3:55:40, 6.87s/it] {'loss': 0.3169, 'grad_norm': 0.6862723789213109, 'learning_rate': 7.299754577060847e-07, 'epoch': 0.83} + 83%|████████▎ | 10130/12188 [18:12<3:55:40, 6.87s/it] 83%|████████▎ | 10131/12188 [18:19<3:58:57, 6.97s/it] {'loss': 0.2956, 'grad_norm': 0.6540246103125703, 'learning_rate': 7.292843292859009e-07, 'epoch': 0.83} + 83%|████████▎ | 10131/12188 [18:19<3:58:57, 6.97s/it] 83%|████████▎ | 10132/12188 [18:26<4:01:33, 7.05s/it] {'loss': 0.299, 'grad_norm': 0.7556489512199625, 'learning_rate': 7.285935024567975e-07, 'epoch': 0.83} + 83%|████████▎ | 10132/12188 [18:26<4:01:33, 7.05s/it] 83%|████████▎ | 10133/12188 [18:35<4:13:52, 7.41s/it] {'loss': 0.2715, 'grad_norm': 0.6940318390665131, 'learning_rate': 7.279029772675572e-07, 'epoch': 0.83} + 83%|████████▎ | 10133/12188 [18:35<4:13:52, 7.41s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 83%|████████▎ | 10134/12188 [18:41<3:59:29, 7.00s/it] {'loss': 0.636, 'grad_norm': 0.5625114217211917, 'learning_rate': 7.27212753766946e-07, 'epoch': 0.83} + 83%|████████▎ | 10134/12188 [18:41<3:59:29, 7.00s/it] 83%|████████▎ | 10135/12188 [18:48<4:01:39, 7.06s/it] {'loss': 0.2927, 'grad_norm': 0.7117533815038416, 'learning_rate': 7.265228320037054e-07, 'epoch': 0.83} + 83%|████████▎ | 10135/12188 [18:48<4:01:39, 7.06s/it] 83%|████████▎ | 10136/12188 [18:55<4:00:24, 7.03s/it] {'loss': 0.2927, 'grad_norm': 0.6768108788189361, 'learning_rate': 7.258332120265554e-07, 'epoch': 0.83} + 83%|████████▎ | 10136/12188 [18:55<4:00:24, 7.03s/it] 83%|████████▎ | 10137/12188 [19:02<4:00:39, 7.04s/it] {'loss': 0.3141, 'grad_norm': 0.7016006065586842, 'learning_rate': 7.251438938841981e-07, 'epoch': 0.83} + 83%|████████▎ | 10137/12188 [19:02<4:00:39, 7.04s/it] 83%|████████▎ | 10138/12188 [19:09<3:56:11, 6.91s/it] {'loss': 0.3284, 'grad_norm': 0.8529070510915026, 'learning_rate': 7.244548776253102e-07, 'epoch': 0.83} + 83%|████████▎ | 10138/12188 [19:09<3:56:11, 6.91s/it] 83%|████████▎ | 10139/12188 [19:19<4:31:48, 7.96s/it] {'loss': 0.311, 'grad_norm': 0.6974913639456904, 'learning_rate': 7.237661632985493e-07, 'epoch': 0.83} + 83%|████████▎ | 10139/12188 [19:19<4:31:48, 7.96s/it] 83%|████████▎ | 10140/12188 [19:26<4:25:37, 7.78s/it] {'loss': 0.2746, 'grad_norm': 0.697719462794252, 'learning_rate': 7.230777509525527e-07, 'epoch': 0.83} + 83%|████████▎ | 10140/12188 [19:26<4:25:37, 7.78s/it] 83%|████████▎ | 10141/12188 [19:33<4:12:36, 7.40s/it] {'loss': 0.2895, 'grad_norm': 0.7178396922507617, 'learning_rate': 7.223896406359326e-07, 'epoch': 0.83} + 83%|████████▎ | 10141/12188 [19:33<4:12:36, 7.40s/it] 83%|████████▎ | 10142/12188 [19:41<4:21:06, 7.66s/it] {'loss': 0.2513, 'grad_norm': 0.6562315489537376, 'learning_rate': 7.217018323972852e-07, 'epoch': 0.83} + 83%|████████▎ | 10142/12188 [19:41<4:21:06, 7.66s/it] 83%|████████▎ | 10143/12188 [19:51<4:41:51, 8.27s/it] {'loss': 0.3208, 'grad_norm': 0.7474140551655923, 'learning_rate': 7.210143262851793e-07, 'epoch': 0.83} + 83%|████████▎ | 10143/12188 [19:51<4:41:51, 8.27s/it] 83%|████████▎ | 10144/12188 [19:58<4:29:56, 7.92s/it] {'loss': 0.3207, 'grad_norm': 0.7086452702205799, 'learning_rate': 7.203271223481672e-07, 'epoch': 0.83} + 83%|████████▎ | 10144/12188 [19:58<4:29:56, 7.92s/it] 83%|████████▎ | 10145/12188 [20:08<4:51:28, 8.56s/it] {'loss': 0.2922, 'grad_norm': 0.6851161716958446, 'learning_rate': 7.196402206347792e-07, 'epoch': 0.83} + 83%|████████▎ | 10145/12188 [20:08<4:51:28, 8.56s/it] 83%|████████▎ | 10146/12188 [20:15<4:36:00, 8.11s/it] {'loss': 0.2562, 'grad_norm': 0.7096240517765878, 'learning_rate': 7.189536211935205e-07, 'epoch': 0.83} + 83%|████████▎ | 10146/12188 [20:15<4:36:00, 8.11s/it] 83%|████████▎ | 10147/12188 [20:22<4:22:18, 7.71s/it] {'loss': 0.3271, 'grad_norm': 0.6696788370694782, 'learning_rate': 7.182673240728804e-07, 'epoch': 0.83} + 83%|████████▎ | 10147/12188 [20:22<4:22:18, 7.71s/it] 83%|████████▎ | 10148/12188 [20:29<4:18:28, 7.60s/it] {'loss': 0.3304, 'grad_norm': 0.6717480822784182, 'learning_rate': 7.175813293213224e-07, 'epoch': 0.83} + 83%|████████▎ | 10148/12188 [20:29<4:18:28, 7.60s/it] 83%|████████▎ | 10149/12188 [20:37<4:24:28, 7.78s/it] {'loss': 0.3008, 'grad_norm': 0.61180840016838, 'learning_rate': 7.168956369872898e-07, 'epoch': 0.83} + 83%|████████▎ | 10149/12188 [20:37<4:24:28, 7.78s/it] 83%|████████▎ | 10150/12188 [20:45<4:22:03, 7.72s/it] {'loss': 0.292, 'grad_norm': 0.7170746361264121, 'learning_rate': 7.162102471192067e-07, 'epoch': 0.83} + 83%|████████▎ | 10150/12188 [20:45<4:22:03, 7.72s/it] 83%|████████▎ | 10151/12188 [20:56<4:51:28, 8.59s/it] {'loss': 0.2929, 'grad_norm': 0.7731319566907908, 'learning_rate': 7.155251597654727e-07, 'epoch': 0.83} + 83%|████████▎ | 10151/12188 [20:56<4:51:28, 8.59s/it] 83%|████████▎ | 10152/12188 [21:02<4:32:39, 8.04s/it] {'loss': 0.2765, 'grad_norm': 0.7468682803982551, 'learning_rate': 7.148403749744687e-07, 'epoch': 0.83} + 83%|████████▎ | 10152/12188 [21:02<4:32:39, 8.04s/it] 83%|████████▎ | 10153/12188 [21:09<4:21:25, 7.71s/it] {'loss': 0.3134, 'grad_norm': 0.6719825234351622, 'learning_rate': 7.141558927945536e-07, 'epoch': 0.83} + 83%|████████▎ | 10153/12188 [21:09<4:21:25, 7.71s/it] 83%|████████▎ | 10154/12188 [21:18<4:35:54, 8.14s/it] {'loss': 0.2845, 'grad_norm': 0.7368179552228274, 'learning_rate': 7.134717132740626e-07, 'epoch': 0.83} + 83%|████████▎ | 10154/12188 [21:18<4:35:54, 8.14s/it] 83%|████████▎ | 10155/12188 [21:25<4:20:45, 7.70s/it] {'loss': 0.2536, 'grad_norm': 0.7390899918784991, 'learning_rate': 7.127878364613133e-07, 'epoch': 0.83} + 83%|████████▎ | 10155/12188 [21:25<4:20:45, 7.70s/it] 83%|████████▎ | 10156/12188 [21:32<4:09:35, 7.37s/it] {'loss': 0.2657, 'grad_norm': 0.6910206054760313, 'learning_rate': 7.121042624045981e-07, 'epoch': 0.83} + 83%|████████▎ | 10156/12188 [21:32<4:09:35, 7.37s/it] 83%|████████▎ | 10157/12188 [21:40<4:23:11, 7.78s/it] {'loss': 0.2955, 'grad_norm': 0.6764970008724442, 'learning_rate': 7.114209911521907e-07, 'epoch': 0.83} + 83%|████████▎ | 10157/12188 [21:40<4:23:11, 7.78s/it] 83%|████████▎ | 10158/12188 [21:48<4:16:14, 7.57s/it] {'loss': 0.275, 'grad_norm': 0.7089536944482263, 'learning_rate': 7.107380227523442e-07, 'epoch': 0.83} + 83%|████████▎ | 10158/12188 [21:48<4:16:14, 7.57s/it] 83%|████████▎ | 10159/12188 [21:56<4:28:10, 7.93s/it] {'loss': 0.281, 'grad_norm': 0.789688949647037, 'learning_rate': 7.100553572532859e-07, 'epoch': 0.83} + 83%|████████▎ | 10159/12188 [21:56<4:28:10, 7.93s/it] 83%|████████▎ | 10160/12188 [22:03<4:16:47, 7.60s/it] {'loss': 0.2749, 'grad_norm': 0.6607243249981796, 'learning_rate': 7.093729947032274e-07, 'epoch': 0.83} + 83%|████████▎ | 10160/12188 [22:03<4:16:47, 7.60s/it] 83%|████████▎ | 10161/12188 [22:11<4:21:19, 7.74s/it] {'loss': 0.2878, 'grad_norm': 0.8695267715563734, 'learning_rate': 7.086909351503529e-07, 'epoch': 0.83} + 83%|████████▎ | 10161/12188 [22:11<4:21:19, 7.74s/it] 83%|████████▎ | 10162/12188 [22:19<4:17:27, 7.62s/it] {'loss': 0.2835, 'grad_norm': 0.7186468488070171, 'learning_rate': 7.080091786428317e-07, 'epoch': 0.83} + 83%|████████▎ | 10162/12188 [22:19<4:17:27, 7.62s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f193cb2c400> +[Try #0] Failed to fetch sample 4616838 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f193cb2c400> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Class: form-control input-sm'"}, {'from': 'gpt', 'value': '\nclick(x=0.489, y=0.228)\n'}]} + 83%|████████▎ | 10163/12188 [22:28<4:35:17, 8.16s/it] {'loss': 0.3412, 'grad_norm': 0.6848591228716592, 'learning_rate': 7.073277252288063e-07, 'epoch': 0.83} + 83%|████████▎ | 10163/12188 [22:28<4:35:17, 8.16s/it] 83%|████████▎ | 10164/12188 [22:36<4:34:10, 8.13s/it] {'loss': 0.3376, 'grad_norm': 0.7699334814172962, 'learning_rate': 7.066465749563994e-07, 'epoch': 0.83} + 83%|████████▎ | 10164/12188 [22:36<4:34:10, 8.13s/it] 83%|████████▎ | 10165/12188 [22:43<4:18:56, 7.68s/it] {'loss': 0.2668, 'grad_norm': 0.7160295539455507, 'learning_rate': 7.059657278737136e-07, 'epoch': 0.83} + 83%|████████▎ | 10165/12188 [22:43<4:18:56, 7.68s/it] 83%|████████▎ | 10166/12188 [22:50<4:13:43, 7.53s/it] {'loss': 0.2683, 'grad_norm': 0.6518124224670889, 'learning_rate': 7.052851840288299e-07, 'epoch': 0.83} + 83%|████████▎ | 10166/12188 [22:50<4:13:43, 7.53s/it] 83%|████████▎ | 10167/12188 [22:58<4:22:12, 7.78s/it] {'loss': 0.3092, 'grad_norm': 0.6664824373295991, 'learning_rate': 7.04604943469806e-07, 'epoch': 0.83} + 83%|████████▎ | 10167/12188 [22:58<4:22:12, 7.78s/it] 83%|████████▎ | 10168/12188 [23:06<4:20:55, 7.75s/it] {'loss': 0.3116, 'grad_norm': 0.6823444797068368, 'learning_rate': 7.039250062446806e-07, 'epoch': 0.83} + 83%|████████▎ | 10168/12188 [23:06<4:20:55, 7.75s/it] 83%|████████▎ | 10169/12188 [23:12<4:07:58, 7.37s/it] {'loss': 0.2859, 'grad_norm': 0.7309819056796586, 'learning_rate': 7.032453724014681e-07, 'epoch': 0.83} + 83%|████████▎ | 10169/12188 [23:12<4:07:58, 7.37s/it] 83%|████████▎ | 10170/12188 [23:23<4:39:18, 8.30s/it] {'loss': 0.296, 'grad_norm': 0.7013463307032202, 'learning_rate': 7.025660419881641e-07, 'epoch': 0.83} + 83%|████████▎ | 10170/12188 [23:23<4:39:18, 8.30s/it] 83%|████████▎ | 10171/12188 [23:30<4:23:10, 7.83s/it] {'loss': 0.3195, 'grad_norm': 0.675857957083896, 'learning_rate': 7.01887015052743e-07, 'epoch': 0.83} + 83%|████████▎ | 10171/12188 [23:30<4:23:10, 7.83s/it] 83%|████████▎ | 10172/12188 [23:38<4:32:08, 8.10s/it] {'loss': 0.3354, 'grad_norm': 0.6847897948654434, 'learning_rate': 7.012082916431545e-07, 'epoch': 0.83} + 83%|████████▎ | 10172/12188 [23:38<4:32:08, 8.10s/it] 83%|████████▎ | 10173/12188 [23:45<4:21:05, 7.77s/it] {'loss': 0.2901, 'grad_norm': 0.7489503957650289, 'learning_rate': 7.005298718073311e-07, 'epoch': 0.83} + 83%|████████▎ | 10173/12188 [23:45<4:21:05, 7.77s/it] 83%|████████▎ | 10174/12188 [23:52<4:09:07, 7.42s/it] {'loss': 0.3032, 'grad_norm': 1.8515132747986534, 'learning_rate': 6.998517555931788e-07, 'epoch': 0.83} + 83%|████████▎ | 10174/12188 [23:52<4:09:07, 7.42s/it] 83%|████████▎ | 10175/12188 [23:59<4:03:01, 7.24s/it] {'loss': 0.2738, 'grad_norm': 0.7155277393577482, 'learning_rate': 6.991739430485883e-07, 'epoch': 0.83} + 83%|████████▎ | 10175/12188 [23:59<4:03:01, 7.24s/it] 83%|████████▎ | 10176/12188 [24:09<4:30:09, 8.06s/it] {'loss': 0.2939, 'grad_norm': 0.7346922858531726, 'learning_rate': 6.984964342214245e-07, 'epoch': 0.83} + 83%|████████▎ | 10176/12188 [24:09<4:30:09, 8.06s/it] 84%|████████▎ | 10177/12188 [24:16<4:19:22, 7.74s/it] {'loss': 0.3084, 'grad_norm': 0.6659138480300646, 'learning_rate': 6.978192291595304e-07, 'epoch': 0.83} + 84%|████████▎ | 10177/12188 [24:16<4:19:22, 7.74s/it] 84%|████████▎ | 10178/12188 [24:22<4:09:42, 7.45s/it] {'loss': 0.321, 'grad_norm': 0.6980569719259774, 'learning_rate': 6.971423279107309e-07, 'epoch': 0.84} + 84%|████████▎ | 10178/12188 [24:22<4:09:42, 7.45s/it] 84%|████████▎ | 10179/12188 [24:30<4:13:59, 7.59s/it] {'loss': 0.2921, 'grad_norm': 0.6451442116935833, 'learning_rate': 6.964657305228262e-07, 'epoch': 0.84} + 84%|████████▎ | 10179/12188 [24:30<4:13:59, 7.59s/it] 84%|████████▎ | 10180/12188 [24:37<4:05:48, 7.34s/it] {'loss': 0.3021, 'grad_norm': 0.668745643405301, 'learning_rate': 6.95789437043598e-07, 'epoch': 0.84} + 84%|████████▎ | 10180/12188 [24:37<4:05:48, 7.34s/it] 84%|████████▎ | 10181/12188 [24:44<4:00:12, 7.18s/it] {'loss': 0.346, 'grad_norm': 0.7826041603037536, 'learning_rate': 6.951134475208049e-07, 'epoch': 0.84} + 84%|████████▎ | 10181/12188 [24:44<4:00:12, 7.18s/it] 84%|████████▎ | 10182/12188 [24:53<4:16:38, 7.68s/it] {'loss': 0.3097, 'grad_norm': 0.7396136201659498, 'learning_rate': 6.944377620021831e-07, 'epoch': 0.84} + 84%|████████▎ | 10182/12188 [24:53<4:16:38, 7.68s/it] 84%|████████▎ | 10183/12188 [25:00<4:10:46, 7.50s/it] {'loss': 0.258, 'grad_norm': 0.6806691571808945, 'learning_rate': 6.937623805354493e-07, 'epoch': 0.84} + 84%|████████▎ | 10183/12188 [25:00<4:10:46, 7.50s/it] 84%|████████▎ | 10184/12188 [25:07<4:04:13, 7.31s/it] {'loss': 0.3484, 'grad_norm': 0.7233155798809241, 'learning_rate': 6.930873031682983e-07, 'epoch': 0.84} + 84%|████████▎ | 10184/12188 [25:07<4:04:13, 7.31s/it] 84%|████████▎ | 10185/12188 [25:14<3:58:59, 7.16s/it] {'loss': 0.2969, 'grad_norm': 0.7562950805378573, 'learning_rate': 6.924125299484014e-07, 'epoch': 0.84} + 84%|████████▎ | 10185/12188 [25:14<3:58:59, 7.16s/it] 84%|████████▎ | 10186/12188 [25:21<4:00:02, 7.19s/it] {'loss': 0.3471, 'grad_norm': 0.6918112555853377, 'learning_rate': 6.917380609234125e-07, 'epoch': 0.84} + 84%|████████▎ | 10186/12188 [25:21<4:00:02, 7.19s/it] 84%|████████▎ | 10187/12188 [25:30<4:19:32, 7.78s/it] {'loss': 0.3004, 'grad_norm': 0.6592448178504848, 'learning_rate': 6.910638961409583e-07, 'epoch': 0.84} + 84%|████████▎ | 10187/12188 [25:30<4:19:32, 7.78s/it] 84%|████████▎ | 10188/12188 [25:37<4:11:18, 7.54s/it] {'loss': 0.273, 'grad_norm': 0.7682096783461542, 'learning_rate': 6.903900356486504e-07, 'epoch': 0.84} + 84%|████████▎ | 10188/12188 [25:37<4:11:18, 7.54s/it] 84%|████████▎ | 10189/12188 [25:44<4:02:08, 7.27s/it] {'loss': 0.3304, 'grad_norm': 0.7428027949958393, 'learning_rate': 6.89716479494073e-07, 'epoch': 0.84} + 84%|████████▎ | 10189/12188 [25:44<4:02:08, 7.27s/it] 84%|████████▎ | 10190/12188 [25:50<3:55:42, 7.08s/it] {'loss': 0.2835, 'grad_norm': 0.7176656517985653, 'learning_rate': 6.890432277247943e-07, 'epoch': 0.84} + 84%|████████▎ | 10190/12188 [25:50<3:55:42, 7.08s/it] 84%|████████▎ | 10191/12188 [25:59<4:12:36, 7.59s/it] {'loss': 0.3076, 'grad_norm': 0.7864011656326787, 'learning_rate': 6.883702803883563e-07, 'epoch': 0.84} + 84%|████████▎ | 10191/12188 [25:59<4:12:36, 7.59s/it] 84%|████████▎ | 10192/12188 [26:06<4:09:04, 7.49s/it] {'loss': 0.3096, 'grad_norm': 0.7393957950387975, 'learning_rate': 6.876976375322808e-07, 'epoch': 0.84} + 84%|████████▎ | 10192/12188 [26:06<4:09:04, 7.49s/it] 84%|████████▎ | 10193/12188 [26:13<4:06:11, 7.40s/it] {'loss': 0.2951, 'grad_norm': 0.6446807869566313, 'learning_rate': 6.870252992040705e-07, 'epoch': 0.84} + 84%|████████▎ | 10193/12188 [26:13<4:06:11, 7.40s/it] 84%|████████▎ | 10194/12188 [26:20<3:58:12, 7.17s/it] {'loss': 0.2984, 'grad_norm': 0.784932489053381, 'learning_rate': 6.86353265451205e-07, 'epoch': 0.84} + 84%|████████▎ | 10194/12188 [26:20<3:58:12, 7.17s/it] 84%|████████▎ | 10195/12188 [26:27<3:59:45, 7.22s/it] {'loss': 0.2969, 'grad_norm': 0.8997830418802651, 'learning_rate': 6.856815363211399e-07, 'epoch': 0.84} + 84%|████████▎ | 10195/12188 [26:27<3:59:45, 7.22s/it] 84%|████████▎ | 10196/12188 [26:35<3:59:02, 7.20s/it] {'loss': 0.2984, 'grad_norm': 0.6439698497411521, 'learning_rate': 6.85010111861315e-07, 'epoch': 0.84} + 84%|████████▎ | 10196/12188 [26:35<3:59:02, 7.20s/it] 84%|████████▎ | 10197/12188 [26:41<3:55:15, 7.09s/it] {'loss': 0.2707, 'grad_norm': 0.7081285841684639, 'learning_rate': 6.843389921191423e-07, 'epoch': 0.84} + 84%|████████▎ | 10197/12188 [26:41<3:55:15, 7.09s/it] 84%|████████▎ | 10198/12188 [26:48<3:53:06, 7.03s/it] {'loss': 0.2811, 'grad_norm': 0.7118416471077345, 'learning_rate': 6.836681771420162e-07, 'epoch': 0.84} + 84%|████████▎ | 10198/12188 [26:48<3:53:06, 7.03s/it] 84%|████████▎ | 10199/12188 [26:55<3:49:25, 6.92s/it] {'loss': 0.2922, 'grad_norm': 0.7809753666748473, 'learning_rate': 6.829976669773098e-07, 'epoch': 0.84} + 84%|████████▎ | 10199/12188 [26:55<3:49:25, 6.92s/it][2025-08-18 10:12:59,646] [WARNING] [stage3.py:2118:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time + 84%|████████▎ | 10200/12188 [27:03<4:05:12, 7.40s/it] {'loss': 0.2971, 'grad_norm': 0.6895543648239966, 'learning_rate': 6.823274616723707e-07, 'epoch': 0.84} + 84%|████████▎ | 10200/12188 [27:03<4:05:12, 7.40s/it] 84%|████████▎ | 10201/12188 [27:10<3:57:23, 7.17s/it] {'loss': 0.2998, 'grad_norm': 0.719160889737388, 'learning_rate': 6.816575612745302e-07, 'epoch': 0.84} + 84%|████████▎ | 10201/12188 [27:10<3:57:23, 7.17s/it] 84%|████████▎ | 10202/12188 [27:19<4:09:58, 7.55s/it] {'loss': 0.3407, 'grad_norm': 0.7802250567004609, 'learning_rate': 6.809879658310953e-07, 'epoch': 0.84} + 84%|████████▎ | 10202/12188 [27:19<4:09:58, 7.55s/it] 84%|████████▎ | 10203/12188 [27:25<4:03:46, 7.37s/it] {'loss': 0.3149, 'grad_norm': 0.7143612103880841, 'learning_rate': 6.803186753893515e-07, 'epoch': 0.84} + 84%|████████▎ | 10203/12188 [27:25<4:03:46, 7.37s/it] 84%|████████▎ | 10204/12188 [27:33<4:06:06, 7.44s/it] {'loss': 0.314, 'grad_norm': 0.7539466271525835, 'learning_rate': 6.796496899965627e-07, 'epoch': 0.84} + 84%|████████▎ | 10204/12188 [27:33<4:06:06, 7.44s/it] 84%|████████▎ | 10205/12188 [27:40<4:02:00, 7.32s/it] {'loss': 0.3158, 'grad_norm': 0.7235863388948043, 'learning_rate': 6.78981009699971e-07, 'epoch': 0.84} + 84%|████████▎ | 10205/12188 [27:40<4:02:00, 7.32s/it] 84%|████████▎ | 10206/12188 [27:47<3:59:59, 7.27s/it] {'loss': 0.3468, 'grad_norm': 0.8403372840859812, 'learning_rate': 6.783126345467978e-07, 'epoch': 0.84} + 84%|████████▎ | 10206/12188 [27:47<3:59:59, 7.27s/it] 84%|████████▎ | 10207/12188 [27:55<4:00:57, 7.30s/it] {'loss': 0.3211, 'grad_norm': 0.694176483656537, 'learning_rate': 6.776445645842439e-07, 'epoch': 0.84} + 84%|████████▎ | 10207/12188 [27:55<4:00:57, 7.30s/it] 84%|████████▍ | 10208/12188 [28:04<4:22:57, 7.97s/it] {'loss': 0.3076, 'grad_norm': 0.7078253420226892, 'learning_rate': 6.769767998594857e-07, 'epoch': 0.84} + 84%|████████▍ | 10208/12188 [28:04<4:22:57, 7.97s/it] 84%|████████▍ | 10209/12188 [28:11<4:11:23, 7.62s/it] {'loss': 0.3097, 'grad_norm': 0.7533481186723354, 'learning_rate': 6.763093404196808e-07, 'epoch': 0.84} + 84%|████████▍ | 10209/12188 [28:11<4:11:23, 7.62s/it] 84%|████████▍ | 10210/12188 [28:18<4:08:25, 7.54s/it] {'loss': 0.3912, 'grad_norm': 0.7328839475299093, 'learning_rate': 6.756421863119634e-07, 'epoch': 0.84} + 84%|████████▍ | 10210/12188 [28:18<4:08:25, 7.54s/it] 84%|████████▍ | 10211/12188 [28:25<4:03:29, 7.39s/it] {'loss': 0.2938, 'grad_norm': 0.6723516812983724, 'learning_rate': 6.749753375834467e-07, 'epoch': 0.84} + 84%|████████▍ | 10211/12188 [28:25<4:03:29, 7.39s/it] 84%|████████▍ | 10212/12188 [28:34<4:19:47, 7.89s/it] {'loss': 0.2913, 'grad_norm': 0.7276265410253977, 'learning_rate': 6.743087942812243e-07, 'epoch': 0.84} + 84%|████████▍ | 10212/12188 [28:34<4:19:47, 7.89s/it] 84%|████████▍ | 10213/12188 [28:43<4:27:00, 8.11s/it] {'loss': 0.3149, 'grad_norm': 0.9252305503578291, 'learning_rate': 6.736425564523641e-07, 'epoch': 0.84} + 84%|████████▍ | 10213/12188 [28:43<4:27:00, 8.11s/it] 84%|████████▍ | 10214/12188 [28:51<4:25:24, 8.07s/it] {'loss': 0.2714, 'grad_norm': 0.6716378579585482, 'learning_rate': 6.729766241439167e-07, 'epoch': 0.84} + 84%|████████▍ | 10214/12188 [28:51<4:25:24, 8.07s/it] 84%|████████▍ | 10215/12188 [28:58<4:16:36, 7.80s/it] {'loss': 0.3058, 'grad_norm': 0.6565361642533456, 'learning_rate': 6.723109974029074e-07, 'epoch': 0.84} + 84%|████████▍ | 10215/12188 [28:58<4:16:36, 7.80s/it] 84%|████████▍ | 10216/12188 [29:05<4:09:42, 7.60s/it] {'loss': 0.2715, 'grad_norm': 0.660336164876307, 'learning_rate': 6.716456762763435e-07, 'epoch': 0.84} + 84%|████████▍ | 10216/12188 [29:05<4:09:42, 7.60s/it] 84%|████████▍ | 10217/12188 [29:13<4:05:53, 7.49s/it] {'loss': 0.2915, 'grad_norm': 0.6726101961582188, 'learning_rate': 6.709806608112068e-07, 'epoch': 0.84} + 84%|████████▍ | 10217/12188 [29:13<4:05:53, 7.49s/it] 84%|████████▍ | 10218/12188 [29:19<3:58:54, 7.28s/it] {'loss': 0.2741, 'grad_norm': 0.8191430770901772, 'learning_rate': 6.703159510544616e-07, 'epoch': 0.84} + 84%|████████▍ | 10218/12188 [29:19<3:58:54, 7.28s/it] 84%|████████▍ | 10219/12188 [29:27<4:07:23, 7.54s/it] {'loss': 0.3337, 'grad_norm': 0.760816981167973, 'learning_rate': 6.696515470530468e-07, 'epoch': 0.84} + 84%|████████▍ | 10219/12188 [29:27<4:07:23, 7.54s/it] 84%|████████▍ | 10220/12188 [29:34<4:01:27, 7.36s/it] {'loss': 0.3033, 'grad_norm': 0.7153630135317914, 'learning_rate': 6.689874488538833e-07, 'epoch': 0.84} + 84%|████████▍ | 10220/12188 [29:34<4:01:27, 7.36s/it] 84%|████████▍ | 10221/12188 [29:42<4:03:45, 7.44s/it] {'loss': 0.294, 'grad_norm': 0.7317047861552876, 'learning_rate': 6.683236565038676e-07, 'epoch': 0.84} + 84%|████████▍ | 10221/12188 [29:42<4:03:45, 7.44s/it] 84%|████████▍ | 10222/12188 [29:49<3:54:25, 7.15s/it] {'loss': 0.3085, 'grad_norm': 0.7082648426372536, 'learning_rate': 6.676601700498764e-07, 'epoch': 0.84} + 84%|████████▍ | 10222/12188 [29:49<3:54:25, 7.15s/it] 84%|████████▍ | 10223/12188 [29:55<3:50:50, 7.05s/it] {'loss': 0.2652, 'grad_norm': 0.8902027397629741, 'learning_rate': 6.669969895387623e-07, 'epoch': 0.84} + 84%|████████▍ | 10223/12188 [29:55<3:50:50, 7.05s/it] 84%|████████▍ | 10224/12188 [30:02<3:49:13, 7.00s/it] {'loss': 0.3309, 'grad_norm': 0.8176100946149006, 'learning_rate': 6.663341150173597e-07, 'epoch': 0.84} + 84%|████████▍ | 10224/12188 [30:02<3:49:13, 7.00s/it] 84%|████████▍ | 10225/12188 [30:09<3:47:25, 6.95s/it] {'loss': 0.3043, 'grad_norm': 0.6728713010519612, 'learning_rate': 6.656715465324803e-07, 'epoch': 0.84} + 84%|████████▍ | 10225/12188 [30:09<3:47:25, 6.95s/it] 84%|████████▍ | 10226/12188 [30:16<3:51:09, 7.07s/it] {'loss': 0.3248, 'grad_norm': 0.7679166754571389, 'learning_rate': 6.650092841309114e-07, 'epoch': 0.84} + 84%|████████▍ | 10226/12188 [30:16<3:51:09, 7.07s/it] 84%|████████▍ | 10227/12188 [30:24<3:52:44, 7.12s/it] {'loss': 0.3063, 'grad_norm': 0.6606405249263796, 'learning_rate': 6.643473278594231e-07, 'epoch': 0.84} + 84%|████████▍ | 10227/12188 [30:24<3:52:44, 7.12s/it] 84%|████████▍ | 10228/12188 [30:31<3:57:03, 7.26s/it] {'loss': 0.2921, 'grad_norm': 0.6603221264887063, 'learning_rate': 6.636856777647599e-07, 'epoch': 0.84} + 84%|████████▍ | 10228/12188 [30:31<3:57:03, 7.26s/it] 84%|████████▍ | 10229/12188 [30:38<3:49:45, 7.04s/it] {'loss': 0.2797, 'grad_norm': 0.7270609287965876, 'learning_rate': 6.630243338936476e-07, 'epoch': 0.84} + 84%|████████▍ | 10229/12188 [30:38<3:49:45, 7.04s/it] 84%|████████▍ | 10230/12188 [30:44<3:46:19, 6.94s/it] {'loss': 0.2938, 'grad_norm': 0.7213696766016465, 'learning_rate': 6.623632962927895e-07, 'epoch': 0.84} + 84%|████████▍ | 10230/12188 [30:44<3:46:19, 6.94s/it] 84%|████████▍ | 10231/12188 [30:51<3:43:34, 6.85s/it] {'loss': 0.3007, 'grad_norm': 0.7764454270212107, 'learning_rate': 6.617025650088671e-07, 'epoch': 0.84} + 84%|████████▍ | 10231/12188 [30:51<3:43:34, 6.85s/it] 84%|████████▍ | 10232/12188 [30:58<3:39:40, 6.74s/it] {'loss': 0.2633, 'grad_norm': 0.7143882416099192, 'learning_rate': 6.610421400885392e-07, 'epoch': 0.84} + 84%|████████▍ | 10232/12188 [30:58<3:39:40, 6.74s/it] 84%|████████▍ | 10233/12188 [31:04<3:39:24, 6.73s/it] {'loss': 0.3229, 'grad_norm': 0.7679735611786526, 'learning_rate': 6.603820215784429e-07, 'epoch': 0.84} + 84%|████████▍ | 10233/12188 [31:04<3:39:24, 6.73s/it] 84%|████████▍ | 10234/12188 [31:14<4:03:36, 7.48s/it] {'loss': 0.2931, 'grad_norm': 0.6623012702094889, 'learning_rate': 6.597222095251965e-07, 'epoch': 0.84} + 84%|████████▍ | 10234/12188 [31:14<4:03:36, 7.48s/it] 84%|████████▍ | 10235/12188 [31:21<4:02:48, 7.46s/it] {'loss': 0.2811, 'grad_norm': 0.6729005067821352, 'learning_rate': 6.590627039753955e-07, 'epoch': 0.84} + 84%|████████▍ | 10235/12188 [31:21<4:02:48, 7.46s/it] 84%|████████▍ | 10236/12188 [31:28<3:57:28, 7.30s/it] {'loss': 0.3371, 'grad_norm': 0.7286928043302029, 'learning_rate': 6.58403504975611e-07, 'epoch': 0.84} + 84%|████████▍ | 10236/12188 [31:28<3:57:28, 7.30s/it] 84%|████████▍ | 10237/12188 [31:35<4:00:04, 7.38s/it] {'loss': 0.2823, 'grad_norm': 0.7171227185528858, 'learning_rate': 6.577446125723958e-07, 'epoch': 0.84} + 84%|████████▍ | 10237/12188 [31:35<4:00:04, 7.38s/it] 84%|████████▍ | 10238/12188 [31:43<3:57:21, 7.30s/it] {'loss': 0.3084, 'grad_norm': 0.7668695415316741, 'learning_rate': 6.570860268122808e-07, 'epoch': 0.84} + 84%|████████▍ | 10238/12188 [31:43<3:57:21, 7.30s/it] 84%|████████▍ | 10239/12188 [31:52<4:21:56, 8.06s/it] {'loss': 0.2653, 'grad_norm': 0.6976709140307319, 'learning_rate': 6.564277477417719e-07, 'epoch': 0.84} + 84%|████████▍ | 10239/12188 [31:52<4:21:56, 8.06s/it] 84%|████████▍ | 10240/12188 [32:00<4:18:12, 7.95s/it] {'loss': 0.2984, 'grad_norm': 0.6764044151220264, 'learning_rate': 6.557697754073577e-07, 'epoch': 0.84} + 84%|████████▍ | 10240/12188 [32:00<4:18:12, 7.95s/it] 84%|████████▍ | 10241/12188 [32:09<4:27:01, 8.23s/it] {'loss': 0.2818, 'grad_norm': 0.6516931858203354, 'learning_rate': 6.551121098555019e-07, 'epoch': 0.84} + 84%|████████▍ | 10241/12188 [32:09<4:27:01, 8.23s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fcf3bd50d10> +[Try #0] Failed to fetch sample 4617819 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fcf3bd50d10> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Contact us'"}, {'from': 'gpt', 'value': '\nclick(x=0.8505, y=0.32)\n'}]} +[2025-08-18 10:18:14,093] [WARNING] [stage3.py:2118:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time + 84%|████████▍ | 10242/12188 [32:18<4:33:49, 8.44s/it] {'loss': 0.2825, 'grad_norm': 0.6338332644708029, 'learning_rate': 6.544547511326482e-07, 'epoch': 0.84} + 84%|████████▍ | 10242/12188 [32:18<4:33:49, 8.44s/it] 84%|████████▍ | 10243/12188 [32:25<4:20:15, 8.03s/it] {'loss': 0.3526, 'grad_norm': 0.743523478318049, 'learning_rate': 6.537976992852196e-07, 'epoch': 0.84} + 84%|████████▍ | 10243/12188 [32:25<4:20:15, 8.03s/it] 84%|████████▍ | 10244/12188 [32:36<4:50:54, 8.98s/it] {'loss': 0.3004, 'grad_norm': 0.7228133402882518, 'learning_rate': 6.531409543596146e-07, 'epoch': 0.84} + 84%|████████▍ | 10244/12188 [32:36<4:50:54, 8.98s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f9a19f12890> +[Try #0] Failed to fetch sample 4335455 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f9a19f12890> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Watches'"}, {'from': 'gpt', 'value': '\nclick(x=0.681, y=0.266)\n'}]} + 84%|████████▍ | 10245/12188 [32:43<4:25:51, 8.21s/it] {'loss': 0.2828, 'grad_norm': 0.7696516164719953, 'learning_rate': 6.524845164022104e-07, 'epoch': 0.84} + 84%|████████▍ | 10245/12188 [32:43<4:25:51, 8.21s/it] 84%|████████▍ | 10246/12188 [32:49<4:08:50, 7.69s/it] {'loss': 0.3114, 'grad_norm': 0.6990207627003101, 'learning_rate': 6.51828385459366e-07, 'epoch': 0.84} + 84%|████████▍ | 10246/12188 [32:49<4:08:50, 7.69s/it] 84%|████████▍ | 10247/12188 [32:56<4:04:13, 7.55s/it] {'loss': 0.304, 'grad_norm': 0.6521438964794419, 'learning_rate': 6.511725615774139e-07, 'epoch': 0.84} + 84%|████████▍ | 10247/12188 [32:56<4:04:13, 7.55s/it] 84%|████████▍ | 10248/12188 [33:03<3:56:06, 7.30s/it] {'loss': 0.2698, 'grad_norm': 0.7154579426431605, 'learning_rate': 6.505170448026699e-07, 'epoch': 0.84} + 84%|████████▍ | 10248/12188 [33:03<3:56:06, 7.30s/it] 84%|████████▍ | 10249/12188 [33:11<4:06:37, 7.63s/it] {'loss': 0.3218, 'grad_norm': 0.8743797633537393, 'learning_rate': 6.498618351814229e-07, 'epoch': 0.84} + 84%|████████▍ | 10249/12188 [33:11<4:06:37, 7.63s/it] 84%|████████▍ | 10250/12188 [33:18<4:00:04, 7.43s/it] {'loss': 0.2784, 'grad_norm': 0.6922479597665282, 'learning_rate': 6.492069327599454e-07, 'epoch': 0.84} + 84%|████████▍ | 10250/12188 [33:18<4:00:04, 7.43s/it] 84%|████████▍ | 10251/12188 [33:26<3:59:47, 7.43s/it] {'loss': 0.3092, 'grad_norm': 0.662424490583974, 'learning_rate': 6.485523375844826e-07, 'epoch': 0.84} + 84%|████████▍ | 10251/12188 [33:26<3:59:47, 7.43s/it] 84%|████████▍ | 10252/12188 [33:32<3:49:44, 7.12s/it] {'loss': 0.3297, 'grad_norm': 0.7164978194974566, 'learning_rate': 6.478980497012632e-07, 'epoch': 0.84} + 84%|████████▍ | 10252/12188 [33:32<3:49:44, 7.12s/it] 84%|████████▍ | 10253/12188 [33:39<3:46:47, 7.03s/it] {'loss': 0.3062, 'grad_norm': 0.6807184815960056, 'learning_rate': 6.472440691564924e-07, 'epoch': 0.84} + 84%|████████▍ | 10253/12188 [33:39<3:46:47, 7.03s/it] 84%|████████▍ | 10254/12188 [33:48<4:02:05, 7.51s/it] {'loss': 0.3167, 'grad_norm': 0.6301197964661549, 'learning_rate': 6.46590395996351e-07, 'epoch': 0.84} + 84%|████████▍ | 10254/12188 [33:48<4:02:05, 7.51s/it] 84%|████████▍ | 10255/12188 [33:55<3:56:02, 7.33s/it] {'loss': 0.3074, 'grad_norm': 0.6840860031865749, 'learning_rate': 6.459370302670015e-07, 'epoch': 0.84} + 84%|████████▍ | 10255/12188 [33:55<3:56:02, 7.33s/it] 84%|████████▍ | 10256/12188 [34:02<3:54:37, 7.29s/it] {'loss': 0.2818, 'grad_norm': 0.6493460312366953, 'learning_rate': 6.452839720145848e-07, 'epoch': 0.84} + 84%|████████▍ | 10256/12188 [34:02<3:54:37, 7.29s/it] 84%|████████▍ | 10257/12188 [34:09<3:58:10, 7.40s/it] {'loss': 0.2654, 'grad_norm': 0.6489965528380713, 'learning_rate': 6.446312212852162e-07, 'epoch': 0.84} + 84%|████████▍ | 10257/12188 [34:09<3:58:10, 7.40s/it] 84%|████████▍ | 10258/12188 [34:17<4:00:51, 7.49s/it] {'loss': 0.3026, 'grad_norm': 0.6822941663159183, 'learning_rate': 6.439787781249945e-07, 'epoch': 0.84} + 84%|████████▍ | 10258/12188 [34:17<4:00:51, 7.49s/it] 84%|████████▍ | 10259/12188 [34:24<3:52:43, 7.24s/it] {'loss': 0.3094, 'grad_norm': 0.736898979881187, 'learning_rate': 6.433266425799933e-07, 'epoch': 0.84} + 84%|████████▍ | 10259/12188 [34:24<3:52:43, 7.24s/it] 84%|████████▍ | 10260/12188 [34:33<4:09:02, 7.75s/it] {'loss': 0.2921, 'grad_norm': 0.6421027343487261, 'learning_rate': 6.426748146962635e-07, 'epoch': 0.84} + 84%|████████▍ | 10260/12188 [34:33<4:09:02, 7.75s/it] 84%|████████▍ | 10261/12188 [34:40<4:04:00, 7.60s/it] {'loss': 0.3284, 'grad_norm': 0.6545782108637507, 'learning_rate': 6.420232945198395e-07, 'epoch': 0.84} + 84%|████████▍ | 10261/12188 [34:40<4:04:00, 7.60s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fbba93c2e80> +[Try #0] Failed to fetch sample 4558880 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fbba93c2e80> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Settings - Sleeping'"}, {'from': 'gpt', 'value': '\nclick(x=0.566, y=0.012)\n'}]} + 84%|████████▍ | 10262/12188 [34:47<3:59:01, 7.45s/it] {'loss': 0.3055, 'grad_norm': 0.6945856964136395, 'learning_rate': 6.413720820967273e-07, 'epoch': 0.84} + 84%|████████▍ | 10262/12188 [34:47<3:59:01, 7.45s/it] 84%|████████▍ | 10263/12188 [34:55<4:02:09, 7.55s/it] {'loss': 0.2846, 'grad_norm': 0.6454772108869336, 'learning_rate': 6.407211774729171e-07, 'epoch': 0.84} + 84%|████████▍ | 10263/12188 [34:55<4:02:09, 7.55s/it] 84%|████████▍ | 10264/12188 [35:02<3:58:28, 7.44s/it] {'loss': 0.272, 'grad_norm': 0.7072819097804961, 'learning_rate': 6.400705806943724e-07, 'epoch': 0.84} + 84%|████████▍ | 10264/12188 [35:02<3:58:28, 7.44s/it] 84%|████████▍ | 10265/12188 [35:09<3:56:50, 7.39s/it] {'loss': 0.3014, 'grad_norm': 1.5513197934997207, 'learning_rate': 6.394202918070391e-07, 'epoch': 0.84} + 84%|████████▍ | 10265/12188 [35:09<3:56:50, 7.39s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fd4144cd6c0> +[Try #0] Failed to fetch sample 4698401 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fd4144cd6c0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Code of Conduct'"}, {'from': 'gpt', 'value': '\nclick(x=0.917, y=0.558)\n'}]} + 84%|████████▍ | 10266/12188 [35:16<3:54:23, 7.32s/it] {'loss': 0.315, 'grad_norm': 0.6561415839621625, 'learning_rate': 6.387703108568394e-07, 'epoch': 0.84} + 84%|████████▍ | 10266/12188 [35:16<3:54:23, 7.32s/it] 84%|████████▍ | 10267/12188 [35:23<3:49:15, 7.16s/it] {'loss': 0.3074, 'grad_norm': 0.6650063934961576, 'learning_rate': 6.381206378896726e-07, 'epoch': 0.84} + 84%|████████▍ | 10267/12188 [35:23<3:49:15, 7.16s/it] 84%|████████▍ | 10268/12188 [35:30<3:47:36, 7.11s/it] {'loss': 0.276, 'grad_norm': 0.6915995479472611, 'learning_rate': 6.374712729514198e-07, 'epoch': 0.84} + 84%|████████▍ | 10268/12188 [35:30<3:47:36, 7.11s/it] 84%|████████▍ | 10269/12188 [35:37<3:46:20, 7.08s/it] {'loss': 0.2973, 'grad_norm': 1.1656061858939937, 'learning_rate': 6.368222160879356e-07, 'epoch': 0.84} + 84%|████████▍ | 10269/12188 [35:37<3:46:20, 7.08s/it] 84%|████████▍ | 10270/12188 [35:45<3:49:01, 7.16s/it] {'loss': 0.3036, 'grad_norm': 0.7219053160963884, 'learning_rate': 6.361734673450565e-07, 'epoch': 0.84} + 84%|████████▍ | 10270/12188 [35:45<3:49:01, 7.16s/it] 84%|████████▍ | 10271/12188 [35:53<4:00:39, 7.53s/it] {'loss': 0.3125, 'grad_norm': 0.8144907638028093, 'learning_rate': 6.35525026768597e-07, 'epoch': 0.84} + 84%|████████▍ | 10271/12188 [35:53<4:00:39, 7.53s/it] 84%|████████▍ | 10272/12188 [36:00<3:52:08, 7.27s/it] {'loss': 0.2987, 'grad_norm': 0.7161364731890631, 'learning_rate': 6.348768944043482e-07, 'epoch': 0.84} + 84%|████████▍ | 10272/12188 [36:00<3:52:08, 7.27s/it] 84%|████████▍ | 10273/12188 [36:06<3:46:29, 7.10s/it] {'loss': 0.2796, 'grad_norm': 0.6997347789673628, 'learning_rate': 6.342290702980785e-07, 'epoch': 0.84} + 84%|████████▍ | 10273/12188 [36:06<3:46:29, 7.10s/it] 84%|████████▍ | 10274/12188 [36:15<4:04:02, 7.65s/it] {'loss': 0.3071, 'grad_norm': 0.6997384803660959, 'learning_rate': 6.335815544955392e-07, 'epoch': 0.84} + 84%|████████▍ | 10274/12188 [36:15<4:04:02, 7.65s/it] 84%|████████▍ | 10275/12188 [36:22<3:53:08, 7.31s/it] {'loss': 0.2846, 'grad_norm': 0.6588822302756335, 'learning_rate': 6.329343470424542e-07, 'epoch': 0.84} + 84%|████████▍ | 10275/12188 [36:22<3:53:08, 7.31s/it] 84%|████████▍ | 10276/12188 [36:29<3:50:50, 7.24s/it] {'loss': 0.3092, 'grad_norm': 0.6424582185645247, 'learning_rate': 6.322874479845309e-07, 'epoch': 0.84} + 84%|████████▍ | 10276/12188 [36:29<3:50:50, 7.24s/it] 84%|████████▍ | 10277/12188 [36:36<3:45:48, 7.09s/it] {'loss': 0.3178, 'grad_norm': 0.9483850623928193, 'learning_rate': 6.316408573674492e-07, 'epoch': 0.84} + 84%|████████▍ | 10277/12188 [36:36<3:45:48, 7.09s/it] 84%|████████▍ | 10278/12188 [36:42<3:42:17, 6.98s/it] {'loss': 0.2984, 'grad_norm': 0.6724065020723016, 'learning_rate': 6.309945752368718e-07, 'epoch': 0.84} + 84%|████████▍ | 10278/12188 [36:42<3:42:17, 6.98s/it] 84%|████████▍ | 10279/12188 [36:50<3:50:24, 7.24s/it] {'loss': 0.3284, 'grad_norm': 0.7362399781647987, 'learning_rate': 6.303486016384392e-07, 'epoch': 0.84} + 84%|████████▍ | 10279/12188 [36:50<3:50:24, 7.24s/it] 84%|████████▍ | 10280/12188 [36:58<3:57:19, 7.46s/it] {'loss': 0.3117, 'grad_norm': 0.785497657601072, 'learning_rate': 6.29702936617767e-07, 'epoch': 0.84} + 84%|████████▍ | 10280/12188 [36:58<3:57:19, 7.46s/it] 84%|████████▍ | 10281/12188 [37:05<3:53:17, 7.34s/it] {'loss': 0.3211, 'grad_norm': 0.7008768534858637, 'learning_rate': 6.290575802204535e-07, 'epoch': 0.84} + 84%|████████▍ | 10281/12188 [37:05<3:53:17, 7.34s/it] 84%|████████▍ | 10282/12188 [37:12<3:52:27, 7.32s/it] {'loss': 0.32, 'grad_norm': 0.7186603482164727, 'learning_rate': 6.284125324920698e-07, 'epoch': 0.84} + 84%|████████▍ | 10282/12188 [37:12<3:52:27, 7.32s/it] 84%|████████▍ | 10283/12188 [37:20<3:50:00, 7.24s/it] {'loss': 0.3267, 'grad_norm': 0.7201000350097785, 'learning_rate': 6.277677934781695e-07, 'epoch': 0.84} + 84%|████████▍ | 10283/12188 [37:20<3:50:00, 7.24s/it] 84%|████████▍ | 10284/12188 [37:26<3:46:44, 7.15s/it] {'loss': 0.285, 'grad_norm': 0.7071231131064738, 'learning_rate': 6.27123363224284e-07, 'epoch': 0.84} + 84%|████████▍ | 10284/12188 [37:26<3:46:44, 7.15s/it] 84%|████████▍ | 10285/12188 [37:33<3:43:47, 7.06s/it] {'loss': 0.3266, 'grad_norm': 0.8345373011899235, 'learning_rate': 6.264792417759202e-07, 'epoch': 0.84} + 84%|████████▍ | 10285/12188 [37:33<3:43:47, 7.06s/it] 84%|████████▍ | 10286/12188 [37:40<3:45:00, 7.10s/it] {'loss': 0.2612, 'grad_norm': 0.6838897384417124, 'learning_rate': 6.258354291785668e-07, 'epoch': 0.84} + 84%|████████▍ | 10286/12188 [37:41<3:45:00, 7.10s/it] 84%|████████▍ | 10287/12188 [37:47<3:41:23, 6.99s/it] {'loss': 0.2758, 'grad_norm': 0.7194812663450679, 'learning_rate': 6.251919254776878e-07, 'epoch': 0.84} + 84%|████████▍ | 10287/12188 [37:47<3:41:23, 6.99s/it] 84%|████████▍ | 10288/12188 [37:54<3:42:53, 7.04s/it] {'loss': 0.3024, 'grad_norm': 0.6823998071260308, 'learning_rate': 6.245487307187253e-07, 'epoch': 0.84} + 84%|████████▍ | 10288/12188 [37:54<3:42:53, 7.04s/it] 84%|████████▍ | 10289/12188 [38:02<3:46:52, 7.17s/it] {'loss': 0.3326, 'grad_norm': 0.757559577516505, 'learning_rate': 6.239058449471025e-07, 'epoch': 0.84} + 84%|████████▍ | 10289/12188 [38:02<3:46:52, 7.17s/it] 84%|████████▍ | 10290/12188 [38:09<3:47:27, 7.19s/it] {'loss': 0.3, 'grad_norm': 0.7615727924602597, 'learning_rate': 6.232632682082174e-07, 'epoch': 0.84} + 84%|████████▍ | 10290/12188 [38:09<3:47:27, 7.19s/it] 84%|████████▍ | 10291/12188 [38:16<3:40:30, 6.97s/it] {'loss': 0.2929, 'grad_norm': 0.9870609114407362, 'learning_rate': 6.226210005474486e-07, 'epoch': 0.84} + 84%|████████▍ | 10291/12188 [38:16<3:40:30, 6.97s/it] 84%|████████▍ | 10292/12188 [38:22<3:36:59, 6.87s/it] {'loss': 0.3117, 'grad_norm': 0.7491778849914047, 'learning_rate': 6.219790420101529e-07, 'epoch': 0.84} + 84%|████████▍ | 10292/12188 [38:22<3:36:59, 6.87s/it] 84%|████████▍ | 10293/12188 [38:31<3:54:49, 7.43s/it] {'loss': 0.28, 'grad_norm': 0.6663333220081005, 'learning_rate': 6.213373926416627e-07, 'epoch': 0.84} + 84%|████████▍ | 10293/12188 [38:31<3:54:49, 7.43s/it] 84%|████████▍ | 10294/12188 [38:38<3:49:27, 7.27s/it] {'loss': 0.2753, 'grad_norm': 0.9022729664561258, 'learning_rate': 6.206960524872913e-07, 'epoch': 0.84} + 84%|████████▍ | 10294/12188 [38:38<3:49:27, 7.27s/it] 84%|████████▍ | 10295/12188 [38:45<3:44:33, 7.12s/it] {'loss': 0.3034, 'grad_norm': 0.9821555274214242, 'learning_rate': 6.200550215923284e-07, 'epoch': 0.84} + 84%|████████▍ | 10295/12188 [38:45<3:44:33, 7.12s/it] 84%|████████▍ | 10296/12188 [38:52<3:46:59, 7.20s/it] {'loss': 0.3033, 'grad_norm': 0.878113873567418, 'learning_rate': 6.194143000020425e-07, 'epoch': 0.84} + 84%|████████▍ | 10296/12188 [38:52<3:46:59, 7.20s/it] 84%|████████▍ | 10297/12188 [38:59<3:41:05, 7.02s/it] {'loss': 0.2815, 'grad_norm': 0.9671858331805083, 'learning_rate': 6.187738877616822e-07, 'epoch': 0.84} + 84%|████████▍ | 10297/12188 [38:59<3:41:05, 7.02s/it] 84%|████████▍ | 10298/12188 [39:06<3:40:53, 7.01s/it] {'loss': 0.291, 'grad_norm': 0.6743174528221769, 'learning_rate': 6.181337849164699e-07, 'epoch': 0.84} + 84%|████████▍ | 10298/12188 [39:06<3:40:53, 7.01s/it] 85%|████████▍ | 10299/12188 [39:13<3:44:39, 7.14s/it] {'loss': 0.3344, 'grad_norm': 0.6697849036488575, 'learning_rate': 6.174939915116107e-07, 'epoch': 0.84} + 85%|████████▍ | 10299/12188 [39:13<3:44:39, 7.14s/it] 85%|████████▍ | 10300/12188 [39:21<3:51:27, 7.36s/it] {'loss': 0.3123, 'grad_norm': 0.6916137175050089, 'learning_rate': 6.168545075922844e-07, 'epoch': 0.85} + 85%|████████▍ | 10300/12188 [39:21<3:51:27, 7.36s/it] 85%|████████▍ | 10301/12188 [39:28<3:46:04, 7.19s/it] {'loss': 0.2847, 'grad_norm': 0.6752180817411597, 'learning_rate': 6.162153332036503e-07, 'epoch': 0.85} + 85%|████████▍ | 10301/12188 [39:28<3:46:04, 7.19s/it] 85%|████████▍ | 10302/12188 [39:35<3:44:25, 7.14s/it] {'loss': 0.3043, 'grad_norm': 0.6999617664822515, 'learning_rate': 6.155764683908466e-07, 'epoch': 0.85} + 85%|████████▍ | 10302/12188 [39:35<3:44:25, 7.14s/it] 85%|████████▍ | 10303/12188 [39:42<3:42:53, 7.09s/it] {'loss': 0.3141, 'grad_norm': 0.7532328604220341, 'learning_rate': 6.14937913198988e-07, 'epoch': 0.85} + 85%|████████▍ | 10303/12188 [39:42<3:42:53, 7.09s/it] 85%|████████▍ | 10304/12188 [39:49<3:45:10, 7.17s/it] {'loss': 0.2974, 'grad_norm': 0.730486704187805, 'learning_rate': 6.142996676731688e-07, 'epoch': 0.85} + 85%|████████▍ | 10304/12188 [39:49<3:45:10, 7.17s/it] 85%|████████▍ | 10305/12188 [39:56<3:39:11, 6.98s/it] {'loss': 0.3127, 'grad_norm': 0.7170607151996717, 'learning_rate': 6.13661731858462e-07, 'epoch': 0.85} + 85%|████████▍ | 10305/12188 [39:56<3:39:11, 6.98s/it] 85%|████████▍ | 10306/12188 [40:02<3:35:59, 6.89s/it] {'loss': 0.3384, 'grad_norm': 0.8053916558047465, 'learning_rate': 6.130241057999153e-07, 'epoch': 0.85} + 85%|████████▍ | 10306/12188 [40:02<3:35:59, 6.89s/it] 85%|████████▍ | 10307/12188 [40:09<3:35:19, 6.87s/it] {'loss': 0.3134, 'grad_norm': 0.6902325933304905, 'learning_rate': 6.123867895425589e-07, 'epoch': 0.85} + 85%|████████▍ | 10307/12188 [40:09<3:35:19, 6.87s/it] 85%|████████▍ | 10308/12188 [40:17<3:43:18, 7.13s/it] {'loss': 0.2955, 'grad_norm': 0.7994079460647601, 'learning_rate': 6.117497831313973e-07, 'epoch': 0.85} + 85%|████████▍ | 10308/12188 [40:17<3:43:18, 7.13s/it] 85%|████████▍ | 10309/12188 [40:24<3:41:30, 7.07s/it] {'loss': 0.293, 'grad_norm': 0.66059248888879, 'learning_rate': 6.111130866114162e-07, 'epoch': 0.85} + 85%|████████▍ | 10309/12188 [40:24<3:41:30, 7.07s/it] 85%|████████▍ | 10310/12188 [40:30<3:37:05, 6.94s/it] {'loss': 0.3433, 'grad_norm': 0.6715330864064918, 'learning_rate': 6.10476700027578e-07, 'epoch': 0.85} + 85%|████████▍ | 10310/12188 [40:30<3:37:05, 6.94s/it] 85%|████████▍ | 10311/12188 [40:37<3:38:58, 7.00s/it] {'loss': 0.2612, 'grad_norm': 0.7382970361090021, 'learning_rate': 6.098406234248222e-07, 'epoch': 0.85} + 85%|████████▍ | 10311/12188 [40:38<3:38:58, 7.00s/it] 85%|████████▍ | 10312/12188 [40:44<3:37:17, 6.95s/it] {'loss': 0.2923, 'grad_norm': 0.6949599693814503, 'learning_rate': 6.092048568480697e-07, 'epoch': 0.85} + 85%|████████▍ | 10312/12188 [40:44<3:37:17, 6.95s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f8ffaf7aed0> +[Try #0] Failed to fetch sample 4870604 in VC:s3://gui/OS-Atlas/desktop_domain/linux_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f8ffaf7aed0> +Problematic sample: {'image': 'output_20240912_153123_original_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on '跳至本页面的“常见问题解答”部分'"}, {'from': 'gpt', 'value': '\nclick(x=0.2504, y=0.3285)\n'}]} + 85%|████████▍ | 10313/12188 [40:52<3:45:35, 7.22s/it] {'loss': 0.2887, 'grad_norm': 0.6823283568474532, 'learning_rate': 6.085694003422144e-07, 'epoch': 0.85} + 85%|████████▍ | 10313/12188 [40:52<3:45:35, 7.22s/it] 85%|████████▍ | 10314/12188 [40:59<3:39:37, 7.03s/it] {'loss': 0.2936, 'grad_norm': 0.6971903796703539, 'learning_rate': 6.079342539521338e-07, 'epoch': 0.85} + 85%|████████▍ | 10314/12188 [40:59<3:39:37, 7.03s/it] 85%|████████▍ | 10315/12188 [41:06<3:43:00, 7.14s/it] {'loss': 0.2822, 'grad_norm': 0.8239287650376151, 'learning_rate': 6.072994177226793e-07, 'epoch': 0.85} + 85%|████████▍ | 10315/12188 [41:06<3:43:00, 7.14s/it] 85%|████████▍ | 10316/12188 [41:13<3:43:55, 7.18s/it] {'loss': 0.3209, 'grad_norm': 0.6661056396468413, 'learning_rate': 6.066648916986823e-07, 'epoch': 0.85} + 85%|████████▍ | 10316/12188 [41:13<3:43:55, 7.18s/it] 85%|█���██████▍ | 10317/12188 [41:21<3:45:43, 7.24s/it] {'loss': 0.2914, 'grad_norm': 0.7228805303663913, 'learning_rate': 6.060306759249529e-07, 'epoch': 0.85} + 85%|████████▍ | 10317/12188 [41:21<3:45:43, 7.24s/it] 85%|████████▍ | 10318/12188 [41:28<3:41:46, 7.12s/it] {'loss': 0.2819, 'grad_norm': 0.7111207562607418, 'learning_rate': 6.053967704462766e-07, 'epoch': 0.85} + 85%|████████▍ | 10318/12188 [41:28<3:41:46, 7.12s/it] 85%|████████▍ | 10319/12188 [41:34<3:37:07, 6.97s/it] {'loss': 0.2757, 'grad_norm': 0.6813720174080595, 'learning_rate': 6.047631753074195e-07, 'epoch': 0.85} + 85%|████████▍ | 10319/12188 [41:34<3:37:07, 6.97s/it] 85%|████████▍ | 10320/12188 [41:42<3:42:01, 7.13s/it] {'loss': 0.3268, 'grad_norm': 0.7154007958203259, 'learning_rate': 6.041298905531273e-07, 'epoch': 0.85} + 85%|████████▍ | 10320/12188 [41:42<3:42:01, 7.13s/it] 85%|████████▍ | 10321/12188 [41:49<3:42:50, 7.16s/it] {'loss': 0.3169, 'grad_norm': 1.1524112824110109, 'learning_rate': 6.034969162281184e-07, 'epoch': 0.85} + 85%|████████▍ | 10321/12188 [41:49<3:42:50, 7.16s/it] 85%|████████▍ | 10322/12188 [41:56<3:41:10, 7.11s/it] {'loss': 0.3055, 'grad_norm': 0.6301075862299148, 'learning_rate': 6.028642523770934e-07, 'epoch': 0.85} + 85%|████████▍ | 10322/12188 [41:56<3:41:10, 7.11s/it] 85%|████████▍ | 10323/12188 [42:03<3:42:23, 7.15s/it] {'loss': 0.2794, 'grad_norm': 0.6755510808157721, 'learning_rate': 6.022318990447318e-07, 'epoch': 0.85} + 85%|████████▍ | 10323/12188 [42:03<3:42:23, 7.15s/it] 85%|████████▍ | 10324/12188 [42:11<3:45:22, 7.25s/it] {'loss': 0.294, 'grad_norm': 0.8030783669976366, 'learning_rate': 6.01599856275687e-07, 'epoch': 0.85} + 85%|████████▍ | 10324/12188 [42:11<3:45:22, 7.25s/it] 85%|████████▍ | 10325/12188 [42:21<4:15:39, 8.23s/it] {'loss': 0.3302, 'grad_norm': 0.6822585796309107, 'learning_rate': 6.009681241145943e-07, 'epoch': 0.85} + 85%|████████▍ | 10325/12188 [42:21<4:15:39, 8.23s/it] 85%|████████▍ | 10326/12188 [42:30<4:18:29, 8.33s/it] {'loss': 0.2835, 'grad_norm': 0.6261724608739906, 'learning_rate': 6.003367026060647e-07, 'epoch': 0.85} + 85%|████████▍ | 10326/12188 [42:30<4:18:29, 8.33s/it] 85%|████████▍ | 10327/12188 [42:37<4:03:30, 7.85s/it] {'loss': 0.283, 'grad_norm': 0.7396678417275611, 'learning_rate': 5.997055917946893e-07, 'epoch': 0.85} + 85%|████████▍ | 10327/12188 [42:37<4:03:30, 7.85s/it] 85%|████████▍ | 10328/12188 [42:44<4:00:15, 7.75s/it] {'loss': 0.2936, 'grad_norm': 0.8769166429105894, 'learning_rate': 5.99074791725035e-07, 'epoch': 0.85} + 85%|████████▍ | 10328/12188 [42:44<4:00:15, 7.75s/it] 85%|████████▍ | 10329/12188 [42:53<4:08:46, 8.03s/it] {'loss': 0.2545, 'grad_norm': 0.7077229654935955, 'learning_rate': 5.984443024416476e-07, 'epoch': 0.85} + 85%|████████▍ | 10329/12188 [42:53<4:08:46, 8.03s/it] 85%|████████▍ | 10330/12188 [42:59<3:56:01, 7.62s/it] {'loss': 0.3154, 'grad_norm': 0.7235856546170394, 'learning_rate': 5.978141239890528e-07, 'epoch': 0.85} + 85%|████████▍ | 10330/12188 [42:59<3:56:01, 7.62s/it] 85%|████████▍ | 10331/12188 [43:07<3:54:38, 7.58s/it] {'loss': 0.3302, 'grad_norm': 0.6702635334456145, 'learning_rate': 5.971842564117513e-07, 'epoch': 0.85} + 85%|████████▍ | 10331/12188 [43:07<3:54:38, 7.58s/it] 85%|████████▍ | 10332/12188 [43:14<3:47:39, 7.36s/it] {'loss': 0.275, 'grad_norm': 0.9217165027654823, 'learning_rate': 5.965546997542238e-07, 'epoch': 0.85} + 85%|████████▍ | 10332/12188 [43:14<3:47:39, 7.36s/it] 85%|████████▍ | 10333/12188 [43:22<3:52:20, 7.52s/it] {'loss': 0.3443, 'grad_norm': 0.6777524707639412, 'learning_rate': 5.959254540609294e-07, 'epoch': 0.85} + 85%|████████▍ | 10333/12188 [43:22<3:52:20, 7.52s/it] 85%|████████▍ | 10334/12188 [43:29<3:47:22, 7.36s/it] {'loss': 0.2758, 'grad_norm': 0.6745544619189585, 'learning_rate': 5.952965193763028e-07, 'epoch': 0.85} + 85%|████████▍ | 10334/12188 [43:29<3:47:22, 7.36s/it] 85%|████████▍ | 10335/12188 [43:36<3:51:26, 7.49s/it] {'loss': 0.3228, 'grad_norm': 0.7228110607559504, 'learning_rate': 5.946678957447605e-07, 'epoch': 0.85} + 85%|████████▍ | 10335/12188 [43:36<3:51:26, 7.49s/it] 85%|████████▍ | 10336/12188 [43:44<3:52:11, 7.52s/it] {'loss': 0.3065, 'grad_norm': 0.65870290305815, 'learning_rate': 5.940395832106926e-07, 'epoch': 0.85} + 85%|████████▍ | 10336/12188 [43:44<3:52:11, 7.52s/it] 85%|████████▍ | 10337/12188 [43:51<3:50:48, 7.48s/it] {'loss': 0.333, 'grad_norm': 0.7128342971597688, 'learning_rate': 5.93411581818471e-07, 'epoch': 0.85} + 85%|████████▍ | 10337/12188 [43:51<3:50:48, 7.48s/it] 85%|████████▍ | 10338/12188 [43:58<3:43:18, 7.24s/it] {'loss': 0.3233, 'grad_norm': 0.662942482836264, 'learning_rate': 5.927838916124445e-07, 'epoch': 0.85} + 85%|████████▍ | 10338/12188 [43:58<3:43:18, 7.24s/it] 85%|████████▍ | 10339/12188 [44:05<3:44:22, 7.28s/it] {'loss': 0.314, 'grad_norm': 0.7538975009219002, 'learning_rate': 5.921565126369378e-07, 'epoch': 0.85} + 85%|████████▍ | 10339/12188 [44:05<3:44:22, 7.28s/it] 85%|████████▍ | 10340/12188 [44:12<3:38:02, 7.08s/it] {'loss': 0.3006, 'grad_norm': 0.78043760463771, 'learning_rate': 5.915294449362574e-07, 'epoch': 0.85} + 85%|████████▍ | 10340/12188 [44:12<3:38:02, 7.08s/it] 85%|████████▍ | 10341/12188 [44:22<4:00:42, 7.82s/it] {'loss': 0.3066, 'grad_norm': 0.6882851638555034, 'learning_rate': 5.909026885546837e-07, 'epoch': 0.85} + 85%|████████▍ | 10341/12188 [44:22<4:00:42, 7.82s/it] 85%|████████▍ | 10342/12188 [44:29<3:52:56, 7.57s/it] {'loss': 0.3107, 'grad_norm': 0.6180838256426558, 'learning_rate': 5.902762435364795e-07, 'epoch': 0.85} + 85%|████████▍ | 10342/12188 [44:29<3:52:56, 7.57s/it] 85%|████████▍ | 10343/12188 [44:36<3:46:41, 7.37s/it] {'loss': 0.2893, 'grad_norm': 0.7438863418367802, 'learning_rate': 5.896501099258822e-07, 'epoch': 0.85} + 85%|████████▍ | 10343/12188 [44:36<3:46:41, 7.37s/it] 85%|████████▍ | 10344/12188 [44:42<3:38:39, 7.11s/it] {'loss': 0.2736, 'grad_norm': 0.6706210712810768, 'learning_rate': 5.890242877671077e-07, 'epoch': 0.85} + 85%|████████▍ | 10344/12188 [44:42<3:38:39, 7.11s/it] 85%|████████▍ | 10345/12188 [44:50<3:43:34, 7.28s/it] {'loss': 0.2625, 'grad_norm': 0.6771531911002676, 'learning_rate': 5.883987771043509e-07, 'epoch': 0.85} + 85%|████████▍ | 10345/12188 [44:50<3:43:34, 7.28s/it] 85%|████████▍ | 10346/12188 [44:56<3:38:43, 7.12s/it] {'loss': 0.2878, 'grad_norm': 0.7008649370353063, 'learning_rate': 5.877735779817861e-07, 'epoch': 0.85} + 85%|████████▍ | 10346/12188 [44:56<3:38:43, 7.12s/it] 85%|████████▍ | 10347/12188 [45:04<3:38:52, 7.13s/it] {'loss': 0.3134, 'grad_norm': 0.6927782105560317, 'learning_rate': 5.871486904435608e-07, 'epoch': 0.85} + 85%|████████▍ | 10347/12188 [45:04<3:38:52, 7.13s/it] 85%|████████▍ | 10348/12188 [45:11<3:41:52, 7.24s/it] {'loss': 0.2579, 'grad_norm': 0.624692967788261, 'learning_rate': 5.865241145338063e-07, 'epoch': 0.85} + 85%|████████▍ | 10348/12188 [45:11<3:41:52, 7.24s/it] 85%|████████▍ | 10349/12188 [45:19<3:46:53, 7.40s/it] {'loss': 0.2872, 'grad_norm': 0.7363629318640803, 'learning_rate': 5.858998502966273e-07, 'epoch': 0.85} + 85%|████████▍ | 10349/12188 [45:19<3:46:53, 7.40s/it] 85%|████████▍ | 10350/12188 [45:26<3:42:21, 7.26s/it] {'loss': 0.2889, 'grad_norm': 0.6988748107038013, 'learning_rate': 5.852758977761091e-07, 'epoch': 0.85} + 85%|████████▍ | 10350/12188 [45:26<3:42:21, 7.26s/it] 85%|████████▍ | 10351/12188 [45:33<3:41:50, 7.25s/it] {'loss': 0.3287, 'grad_norm': 0.7985700769991613, 'learning_rate': 5.846522570163155e-07, 'epoch': 0.85} + 85%|████████▍ | 10351/12188 [45:33<3:41:50, 7.25s/it] 85%|████████▍ | 10352/12188 [45:40<3:37:48, 7.12s/it] {'loss': 0.3205, 'grad_norm': 0.7073041716179583, 'learning_rate': 5.840289280612837e-07, 'epoch': 0.85} + 85%|████████▍ | 10352/12188 [45:40<3:37:48, 7.12s/it] 85%|████████▍ | 10353/12188 [45:46<3:32:46, 6.96s/it] {'loss': 0.252, 'grad_norm': 0.6951945689060948, 'learning_rate': 5.83405910955036e-07, 'epoch': 0.85} + 85%|████████▍ | 10353/12188 [45:46<3:32:46, 6.96s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 85%|████████▍ | 10354/12188 [45:53<3:26:19, 6.75s/it] {'loss': 0.6415, 'grad_norm': 0.5757713190659124, 'learning_rate': 5.827832057415661e-07, 'epoch': 0.85} + 85%|████████▍ | 10354/12188 [45:53<3:26:19, 6.75s/it] 85%|████████▍ | 10355/12188 [46:00<3:33:44, 7.00s/it] {'loss': 0.2965, 'grad_norm': 0.6745596583824585, 'learning_rate': 5.821608124648504e-07, 'epoch': 0.85} + 85%|████████▍ | 10355/12188 [46:00<3:33:44, 7.00s/it] 85%|████████▍ | 10356/12188 [46:08<3:40:02, 7.21s/it] {'loss': 0.304, 'grad_norm': 0.6928595323730736, 'learning_rate': 5.815387311688398e-07, 'epoch': 0.85} + 85%|████████▍ | 10356/12188 [46:08<3:40:02, 7.21s/it] 85%|████████▍ | 10357/12188 [46:15<3:34:10, 7.02s/it] {'loss': 0.3053, 'grad_norm': 0.9407474829771827, 'learning_rate': 5.809169618974647e-07, 'epoch': 0.85} + 85%|████████▍ | 10357/12188 [46:15<3:34:10, 7.02s/it] 85%|████████▍ | 10358/12188 [46:22<3:36:39, 7.10s/it] {'loss': 0.2851, 'grad_norm': 0.706766166651488, 'learning_rate': 5.802955046946335e-07, 'epoch': 0.85} + 85%|████████▍ | 10358/12188 [46:22<3:36:39, 7.10s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1348, in _get_item + raise ValueError( +ValueError: Number of image tokens ['data/bottom-navigation/other_screenshot/original/ProductivityBottomNavigation_1739983263.5379376.png'] does not match number of images None +[Try #0] Failed to fetch sample 1868482 in VC:s3://gui-agent/jedi/images/component_v1_130k/component_v1_130k_extracted/. Exception: Number of image tokens ['data/bottom-navigation/other_screenshot/original/ProductivityBottomNavigation_1739983263.5379376.png'] does not match number of images None +Problematic sample: {'image': 'data/bottom-navigation/other_screenshot/original/ProductivityBottomNavigation_1739983263.5379376.png', 'conversations': [], 'image_id': 'data/bottom-navigation/other_screenshot/original/ProductivityBottomNavigation_1739983263.5379376.png'} + 85%|████████▍ | 10359/12188 [46:29<3:33:44, 7.01s/it] {'loss': 0.296, 'grad_norm': 0.7246986547247193, 'learning_rate': 5.796743596042342e-07, 'epoch': 0.85} + 85%|████████▍ | 10359/12188 [46:29<3:33:44, 7.01s/it] 85%|████████▌ | 10360/12188 [46:36<3:34:27, 7.04s/it] {'loss': 0.2428, 'grad_norm': 0.7571739108049534, 'learning_rate': 5.790535266701291e-07, 'epoch': 0.85} + 85%|████████▌ | 10360/12188 [46:36<3:34:27, 7.04s/it] 85%|████████▌ | 10361/12188 [46:43<3:32:09, 6.97s/it] {'loss': 0.3097, 'grad_norm': 0.6934222851863723, 'learning_rate': 5.78433005936162e-07, 'epoch': 0.85} + 85%|████████▌ | 10361/12188 [46:43<3:32:09, 6.97s/it] 85%|████████▌ | 10362/12188 [46:49<3:30:24, 6.91s/it] {'loss': 0.2768, 'grad_norm': 0.7372240282985417, 'learning_rate': 5.778127974461511e-07, 'epoch': 0.85} + 85%|████████▌ | 10362/12188 [46:49<3:30:24, 6.91s/it] 85%|████████▌ | 10363/12188 [46:57<3:36:40, 7.12s/it] {'loss': 0.2753, 'grad_norm': 0.6395711165315371, 'learning_rate': 5.77192901243896e-07, 'epoch': 0.85} + 85%|████████▌ | 10363/12188 [46:57<3:36:40, 7.12s/it] 85%|████████▌ | 10364/12188 [47:04<3:34:01, 7.04s/it] {'loss': 0.3183, 'grad_norm': 0.7005085766316715, 'learning_rate': 5.765733173731731e-07, 'epoch': 0.85} + 85%|████████▌ | 10364/12188 [47:04<3:34:01, 7.04s/it] 85%|████████▌ | 10365/12188 [47:10<3:29:03, 6.88s/it] {'loss': 0.3428, 'grad_norm': 1.1605325747606037, 'learning_rate': 5.759540458777346e-07, 'epoch': 0.85} + 85%|████████▌ | 10365/12188 [47:10<3:29:03, 6.88s/it] 85%|████████▌ | 10366/12188 [47:17<3:29:17, 6.89s/it] {'loss': 0.2683, 'grad_norm': 0.6565887326961953, 'learning_rate': 5.753350868013147e-07, 'epoch': 0.85} + 85%|████████▌ | 10366/12188 [47:17<3:29:17, 6.89s/it] 85%|████████▌ | 10367/12188 [47:24<3:27:30, 6.84s/it] {'loss': 0.3437, 'grad_norm': 0.739666569390188, 'learning_rate': 5.747164401876215e-07, 'epoch': 0.85} + 85%|████████▌ | 10367/12188 [47:24<3:27:30, 6.84s/it] 85%|████████▌ | 10368/12188 [47:31<3:28:31, 6.87s/it] {'loss': 0.2866, 'grad_norm': 0.7883214745849326, 'learning_rate': 5.740981060803441e-07, 'epoch': 0.85} + 85%|████████▌ | 10368/12188 [47:31<3:28:31, 6.87s/it] 85%|████████▌ | 10369/12188 [47:38<3:29:18, 6.90s/it] {'loss': 0.3107, 'grad_norm': 0.7763905788441209, 'learning_rate': 5.73480084523147e-07, 'epoch': 0.85} + 85%|████████▌ | 10369/12188 [47:38<3:29:18, 6.90s/it] 85%|████████▌ | 10370/12188 [47:47<3:51:25, 7.64s/it] {'loss': 0.2913, 'grad_norm': 0.7004659911801006, 'learning_rate': 5.728623755596757e-07, 'epoch': 0.85} + 85%|████████▌ | 10370/12188 [47:47<3:51:25, 7.64s/it] 85%|████████▌ | 10371/12188 [47:55<3:48:25, 7.54s/it] {'loss': 0.3394, 'grad_norm': 0.711835746948398, 'learning_rate': 5.722449792335505e-07, 'epoch': 0.85} + 85%|████████▌ | 10371/12188 [47:55<3:48:25, 7.54s/it] 85%|████████▌ | 10372/12188 [48:03<3:56:46, 7.82s/it] {'loss': 0.2969, 'grad_norm': 0.6692221855516223, 'learning_rate': 5.716278955883703e-07, 'epoch': 0.85} + 85%|████████▌ | 10372/12188 [48:03<3:56:46, 7.82s/it] 85%|████████▌ | 10373/12188 [48:10<3:46:12, 7.48s/it] {'loss': 0.2748, 'grad_norm': 1.34064660461469, 'learning_rate': 5.71011124667713e-07, 'epoch': 0.85} + 85%|████████▌ | 10373/12188 [48:10<3:46:12, 7.48s/it] 85%|████████▌ | 10374/12188 [48:17<3:45:48, 7.47s/it] {'loss': 0.2712, 'grad_norm': 0.720683950735714, 'learning_rate': 5.703946665151356e-07, 'epoch': 0.85} + 85%|████████▌ | 10374/12188 [48:17<3:45:48, 7.47s/it] 85%|████████▌ | 10375/12188 [48:24<3:41:38, 7.33s/it] {'loss': 0.2901, 'grad_norm': 0.9656380522659712, 'learning_rate': 5.697785211741691e-07, 'epoch': 0.85} + 85%|████████▌ | 10375/12188 [48:24<3:41:38, 7.33s/it] 85%|████████▌ | 10376/12188 [48:31<3:35:17, 7.13s/it] {'loss': 0.3096, 'grad_norm': 0.6260697149808212, 'learning_rate': 5.691626886883261e-07, 'epoch': 0.85} + 85%|████████▌ | 10376/12188 [48:31<3:35:17, 7.13s/it] 85%|████████▌ | 10377/12188 [48:41<3:59:58, 7.95s/it] {'loss': 0.2953, 'grad_norm': 0.9220483392457152, 'learning_rate': 5.685471691010958e-07, 'epoch': 0.85} + 85%|████████▌ | 10377/12188 [48:41<3:59:58, 7.95s/it] 85%|████████▌ | 10378/12188 [48:48<3:57:11, 7.86s/it] {'loss': 0.2899, 'grad_norm': 0.6980209022571685, 'learning_rate': 5.679319624559443e-07, 'epoch': 0.85} + 85%|████████▌ | 10378/12188 [48:48<3:57:11, 7.86s/it] 85%|████████▌ | 10379/12188 [48:56<3:53:28, 7.74s/it] {'loss': 0.306, 'grad_norm': 0.7887895113890796, 'learning_rate': 5.673170687963175e-07, 'epoch': 0.85} + 85%|████████▌ | 10379/12188 [48:56<3:53:28, 7.74s/it] 85%|████████▌ | 10380/12188 [49:03<3:47:22, 7.55s/it] {'loss': 0.2799, 'grad_norm': 0.7016016084827583, 'learning_rate': 5.667024881656369e-07, 'epoch': 0.85} + 85%|████████▌ | 10380/12188 [49:03<3:47:22, 7.55s/it] 85%|████████▌ | 10381/12188 [49:10<3:39:08, 7.28s/it] {'loss': 0.2871, 'grad_norm': 0.7712494062037775, 'learning_rate': 5.660882206073037e-07, 'epoch': 0.85} + 85%|████████▌ | 10381/12188 [49:10<3:39:08, 7.28s/it] 85%|████████▌ | 10382/12188 [49:19<4:00:32, 7.99s/it] {'loss': 0.3167, 'grad_norm': 0.7387013907900211, 'learning_rate': 5.654742661646978e-07, 'epoch': 0.85} + 85%|████████▌ | 10382/12188 [49:19<4:00:32, 7.99s/it] 85%|████████▌ | 10383/12188 [49:26<3:49:26, 7.63s/it] {'loss': 0.2938, 'grad_norm': 0.6731013768445532, 'learning_rate': 5.64860624881175e-07, 'epoch': 0.85} + 85%|████████▌ | 10383/12188 [49:26<3:49:26, 7.63s/it] 85%|████████▌ | 10384/12188 [49:34<3:56:01, 7.85s/it] {'loss': 0.2796, 'grad_norm': 0.6791823093434933, 'learning_rate': 5.64247296800069e-07, 'epoch': 0.85} + 85%|████████▌ | 10384/12188 [49:34<3:56:01, 7.85s/it] 85%|████████▌ | 10385/12188 [49:41<3:43:52, 7.45s/it] {'loss': 0.3101, 'grad_norm': 0.7190142140268275, 'learning_rate': 5.636342819646912e-07, 'epoch': 0.85} + 85%|████████▌ | 10385/12188 [49:41<3:43:52, 7.45s/it] 85%|████████▌ | 10386/12188 [49:48<3:42:09, 7.40s/it] {'loss': 0.3055, 'grad_norm': 0.7631097152588587, 'learning_rate': 5.630215804183325e-07, 'epoch': 0.85} + 85%|████████▌ | 10386/12188 [49:48<3:42:09, 7.40s/it] 85%|████████▌ | 10387/12188 [49:55<3:34:58, 7.16s/it] {'loss': 0.3318, 'grad_norm': 0.7149839300208526, 'learning_rate': 5.624091922042629e-07, 'epoch': 0.85} + 85%|████████▌ | 10387/12188 [49:55<3:34:58, 7.16s/it] 85%|████████▌ | 10388/12188 [50:02<3:31:53, 7.06s/it] {'loss': 0.3024, 'grad_norm': 0.6964344749097098, 'learning_rate': 5.617971173657255e-07, 'epoch': 0.85} + 85%|████████▌ | 10388/12188 [50:02<3:31:53, 7.06s/it] 85%|████████▌ | 10389/12188 [50:09<3:30:46, 7.03s/it] {'loss': 0.2831, 'grad_norm': 0.7163269589927844, 'learning_rate': 5.611853559459457e-07, 'epoch': 0.85} + 85%|████████▌ | 10389/12188 [50:09<3:30:46, 7.03s/it] 85%|████████▌ | 10390/12188 [50:16<3:33:11, 7.11s/it] {'loss': 0.28, 'grad_norm': 0.6631699974838849, 'learning_rate': 5.60573907988124e-07, 'epoch': 0.85} + 85%|████████▌ | 10390/12188 [50:16<3:33:11, 7.11s/it] 85%|████████▌ | 10391/12188 [50:23<3:29:31, 7.00s/it] {'loss': 0.3188, 'grad_norm': 0.673494196527081, 'learning_rate': 5.599627735354408e-07, 'epoch': 0.85} + 85%|████████▌ | 10391/12188 [50:23<3:29:31, 7.00s/it] 85%|████████▌ | 10392/12188 [50:30<3:30:08, 7.02s/it] {'loss': 0.2767, 'grad_norm': 0.7102664113715409, 'learning_rate': 5.593519526310532e-07, 'epoch': 0.85} + 85%|████████▌ | 10392/12188 [50:30<3:30:08, 7.02s/it] 85%|████████▌ | 10393/12188 [50:37<3:29:22, 7.00s/it] {'loss': 0.2564, 'grad_norm': 0.6720994985013161, 'learning_rate': 5.587414453180956e-07, 'epoch': 0.85} + 85%|████████▌ | 10393/12188 [50:37<3:29:22, 7.00s/it] 85%|████████▌ | 10394/12188 [50:43<3:25:08, 6.86s/it] {'loss': 0.3056, 'grad_norm': 0.7051833644034473, 'learning_rate': 5.58131251639682e-07, 'epoch': 0.85} + 85%|████████▌ | 10394/12188 [50:43<3:25:08, 6.86s/it] 85%|████████▌ | 10395/12188 [50:50<3:25:16, 6.87s/it] {'loss': 0.2917, 'grad_norm': 0.7352431796906612, 'learning_rate': 5.575213716389039e-07, 'epoch': 0.85} + 85%|████████▌ | 10395/12188 [50:50<3:25:16, 6.87s/it] 85%|████████▌ | 10396/12188 [50:57<3:30:39, 7.05s/it] {'loss': 0.3299, 'grad_norm': 0.7577353659058665, 'learning_rate': 5.569118053588291e-07, 'epoch': 0.85} + 85%|████████▌ | 10396/12188 [50:57<3:30:39, 7.05s/it] 85%|████████▌ | 10397/12188 [51:04<3:28:31, 6.99s/it] {'loss': 0.293, 'grad_norm': 0.6791749335878675, 'learning_rate': 5.563025528425031e-07, 'epoch': 0.85} + 85%|████████▌ | 10397/12188 [51:04<3:28:31, 6.99s/it] 85%|████████▌ | 10398/12188 [51:11<3:24:57, 6.87s/it] {'loss': 0.3207, 'grad_norm': 0.6278398395209941, 'learning_rate': 5.556936141329521e-07, 'epoch': 0.85} + 85%|████████▌ | 10398/12188 [51:11<3:24:57, 6.87s/it] 85%|████████▌ | 10399/12188 [51:18<3:22:48, 6.80s/it] {'loss': 0.2864, 'grad_norm': 0.7779604000750235, 'learning_rate': 5.550849892731774e-07, 'epoch': 0.85} + 85%|████████▌ | 10399/12188 [51:18<3:22:48, 6.80s/it] 85%|████████▌ | 10400/12188 [51:25<3:24:12, 6.85s/it] {'loss': 0.3032, 'grad_norm': 0.7193007094870478, 'learning_rate': 5.5447667830616e-07, 'epoch': 0.85} + 85%|████████▌ | 10400/12188 [51:25<3:24:12, 6.85s/it] 85%|████████▌ | 10401/12188 [51:31<3:24:12, 6.86s/it] {'loss': 0.3002, 'grad_norm': 0.6847338713446537, 'learning_rate': 5.538686812748567e-07, 'epoch': 0.85} + 85%|████████▌ | 10401/12188 [51:31<3:24:12, 6.86s/it] 85%|████████▌ | 10402/12188 [51:39<3:28:13, 7.00s/it] {'loss': 0.2969, 'grad_norm': 0.7112435103013547, 'learning_rate': 5.532609982222048e-07, 'epoch': 0.85} + 85%|████████▌ | 10402/12188 [51:39<3:28:13, 7.00s/it] 85%|████████▌ | 10403/12188 [51:46<3:31:24, 7.11s/it] {'loss': 0.323, 'grad_norm': 0.6830615282781282, 'learning_rate': 5.526536291911161e-07, 'epoch': 0.85} + 85%|████████▌ | 10403/12188 [51:46<3:31:24, 7.11s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1348, in _get_item + raise ValueError( +ValueError: Number of image tokens ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'] does not match number of images None +[Try #0] Failed to fetch sample 1096386 in VC:s3://gui/aguvis/aguvis-stage2/amex/images. Exception: Number of image tokens ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'] does not match number of images None +Problematic sample: {'image': ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'], 'conversations': [{'from': 'human', 'value': "\nPlease generate the next move according to the UI screenshot, the task and previous operations.\n\nTask:\nI want to book a hotel in london, prize should be less than $600, guest rating is 8+, 4 star rating, breakfast included\n\nPrevious operations:\nStep 1: Tap on the Chrome app to start searching for hotels in London.\nStep 2: Type 'Chrome' in the search bar to open the Chrome browser.\nStep 3: Tap on the Hotels.com app icon to begin searching for a hotel.\nStep 4: Wait for the Hotels.com app to finish loading to proceed with the hotel search.\nStep 5: Tap on the 'Going to' field to enter London as the destination.\nStep 6: Tap on 'London' from Recent Searches to select it as the destination for the hotel search.\nStep 7: Tap on 'London, England, United Kingdom' to select it as the destination for the hotel search.\nStep 8: Tap on the 'Search' button to view available hotels in London.\nStep 9: Tap on the filter icon or option to apply filters to the hotel search results.\nStep 10: Swipe up on the screen to reveal more filter options, such as guest rating and hotel stars.\nStep 11: Swipe up to reveal more filter options, specifically guest ratings and hotel stars.\nStep 12: Tap on the 4-star rating box to select it.\nStep 13: Tap 'Done' to confirm the current filter selections and proceed.\nStep 14: Swipe up on the screen to locate and select the 'breakfast included' filter option.\nStep 15: Tap on 'Breakfast included' to select it.\nStep 16: Tap on the 'Done' button to apply the filters and view available hotel options.\nStep 17: Tap on the topmost hotel card to view its details.\nStep 18: Tap on the 'Select a room' button to check available room options and their prices.\nStep 19: Tap on 'Select and customize' to proceed with the room selection and customization options.\nStep 20: Tap on 'Breakfast buffet' to select it, then tap on 'Reserve' to book the room."}, {'from': 'gpt', 'value': "\nGiven the sequence of booking actions, it seems the process is almost complete. The loading sign indicates that the system is processing the final booking or confirmation step.\n\n\nWait for the loading to complete to confirm the booking status.\n\n\nterminate(status='success')\n"}]} + 85%|████████▌ | 10404/12188 [51:54<3:38:07, 7.34s/it] {'loss': 0.3092, 'grad_norm': 0.7071771958459147, 'learning_rate': 5.520465742244824e-07, 'epoch': 0.85} + 85%|████████▌ | 10404/12188 [51:54<3:38:07, 7.34s/it] 85%|████████▌ | 10405/12188 [52:01<3:32:34, 7.15s/it] {'loss': 0.3183, 'grad_norm': 0.687002548043608, 'learning_rate': 5.51439833365175e-07, 'epoch': 0.85} + 85%|████████▌ | 10405/12188 [52:01<3:32:34, 7.15s/it] 85%|████████▌ | 10406/12188 [52:12<4:12:19, 8.50s/it] {'loss': 0.2367, 'grad_norm': 0.6461067635673262, 'learning_rate': 5.508334066560377e-07, 'epoch': 0.85} + 85%|████████▌ | 10406/12188 [52:12<4:12:19, 8.50s/it] 85%|████████▌ | 10407/12188 [52:20<4:02:44, 8.18s/it] {'loss': 0.3065, 'grad_norm': 1.0278511164249828, 'learning_rate': 5.50227294139899e-07, 'epoch': 0.85} + 85%|████████▌ | 10407/12188 [52:20<4:02:44, 8.18s/it] 85%|████████▌ | 10408/12188 [52:27<3:54:57, 7.92s/it] {'loss': 0.2897, 'grad_norm': 0.7484794118680249, 'learning_rate': 5.496214958595581e-07, 'epoch': 0.85} + 85%|████████▌ | 10408/12188 [52:27<3:54:57, 7.92s/it] 85%|████████▌ | 10409/12188 [52:34<3:50:00, 7.76s/it] {'loss': 0.2764, 'grad_norm': 0.6654400655278353, 'learning_rate': 5.490160118577969e-07, 'epoch': 0.85} + 85%|████████▌ | 10409/12188 [52:34<3:50:00, 7.76s/it] 85%|████████▌ | 10410/12188 [52:42<3:44:46, 7.59s/it] {'loss': 0.2732, 'grad_norm': 0.6763146964933987, 'learning_rate': 5.484108421773754e-07, 'epoch': 0.85} + 85%|████████▌ | 10410/12188 [52:42<3:44:46, 7.59s/it] 85%|████████▌ | 10411/12188 [52:50<3:52:47, 7.86s/it] {'loss': 0.3082, 'grad_norm': 0.7289147640068018, 'learning_rate': 5.47805986861028e-07, 'epoch': 0.85} + 85%|████████▌ | 10411/12188 [52:50<3:52:47, 7.86s/it] 85%|████████▌ | 10412/12188 [52:57<3:42:45, 7.53s/it] {'loss': 0.3109, 'grad_norm': 0.6477197583656734, 'learning_rate': 5.47201445951468e-07, 'epoch': 0.85} + 85%|████████▌ | 10412/12188 [52:57<3:42:45, 7.53s/it] 85%|████████▌ | 10413/12188 [53:05<3:45:32, 7.62s/it] {'loss': 0.2913, 'grad_norm': 0.6097593145671578, 'learning_rate': 5.465972194913882e-07, 'epoch': 0.85} + 85%|████████▌ | 10413/12188 [53:05<3:45:32, 7.62s/it] 85%|████████▌ | 10414/12188 [53:12<3:41:03, 7.48s/it] {'loss': 0.2725, 'grad_norm': 0.7483233497735042, 'learning_rate': 5.459933075234575e-07, 'epoch': 0.85} + 85%|████████▌ | 10414/12188 [53:12<3:41:03, 7.48s/it] 85%|████████▌ | 10415/12188 [53:19<3:34:16, 7.25s/it] {'loss': 0.3374, 'grad_norm': 0.705624463080892, 'learning_rate': 5.453897100903244e-07, 'epoch': 0.85} + 85%|████████▌ | 10415/12188 [53:19<3:34:16, 7.25s/it] 85%|████████▌ | 10416/12188 [53:25<3:30:56, 7.14s/it] {'loss': 0.3038, 'grad_norm': 0.7009183125530799, 'learning_rate': 5.447864272346121e-07, 'epoch': 0.85} + 85%|████████▌ | 10416/12188 [53:25<3:30:56, 7.14s/it] 85%|████████▌ | 10417/12188 [53:33<3:30:20, 7.13s/it] {'loss': 0.3335, 'grad_norm': 0.6742919431047569, 'learning_rate': 5.441834589989242e-07, 'epoch': 0.85} + 85%|████████▌ | 10417/12188 [53:33<3:30:20, 7.13s/it] 85%|████████▌ | 10418/12188 [53:39<3:28:00, 7.05s/it] {'loss': 0.2748, 'grad_norm': 0.7150695451459631, 'learning_rate': 5.435808054258429e-07, 'epoch': 0.85} + 85%|████████▌ | 10418/12188 [53:39<3:28:00, 7.05s/it] 85%|████████▌ | 10419/12188 [53:46<3:25:13, 6.96s/it] {'loss': 0.2979, 'grad_norm': 0.6215418935658616, 'learning_rate': 5.42978466557924e-07, 'epoch': 0.85} + 85%|████████▌ | 10419/12188 [53:46<3:25:13, 6.96s/it] 85%|████████▌ | 10420/12188 [53:53<3:24:00, 6.92s/it] {'loss': 0.3074, 'grad_norm': 0.6890161446354959, 'learning_rate': 5.423764424377065e-07, 'epoch': 0.85} + 85%|████████▌ | 10420/12188 [53:53<3:24:00, 6.92s/it] 86%|████████▌ | 10421/12188 [54:01<3:31:25, 7.18s/it] {'loss': 0.2798, 'grad_norm': 0.6186047635968971, 'learning_rate': 5.417747331077017e-07, 'epoch': 0.85} + 86%|████████▌ | 10421/12188 [54:01<3:31:25, 7.18s/it] 86%|████████▌ | 10422/12188 [54:08<3:31:21, 7.18s/it] {'loss': 0.3196, 'grad_norm': 0.6894167856798312, 'learning_rate': 5.411733386104029e-07, 'epoch': 0.86} + 86%|████████▌ | 10422/12188 [54:08<3:31:21, 7.18s/it] 86%|████████▌ | 10423/12188 [54:15<3:28:52, 7.10s/it] {'loss': 0.2626, 'grad_norm': 0.6952357694039761, 'learning_rate': 5.4057225898828e-07, 'epoch': 0.86} + 86%|████████▌ | 10423/12188 [54:15<3:28:52, 7.10s/it] 86%|████████▌ | 10424/12188 [54:25<3:55:03, 7.99s/it] {'loss': 0.3034, 'grad_norm': 0.6647828373110208, 'learning_rate': 5.399714942837792e-07, 'epoch': 0.86} + 86%|████████▌ | 10424/12188 [54:25<3:55:03, 7.99s/it] 86%|████████▌ | 10425/12188 [54:32<3:44:10, 7.63s/it] {'loss': 0.3172, 'grad_norm': 0.6717896488220376, 'learning_rate': 5.393710445393252e-07, 'epoch': 0.86} + 86%|████████▌ | 10425/12188 [54:32<3:44:10, 7.63s/it] 86%|████████▌ | 10426/12188 [54:39<3:38:30, 7.44s/it] {'loss': 0.2796, 'grad_norm': 0.6765584410418546, 'learning_rate': 5.387709097973226e-07, 'epoch': 0.86} + 86%|████████▌ | 10426/12188 [54:39<3:38:30, 7.44s/it] 86%|████████▌ | 10427/12188 [54:46<3:33:14, 7.27s/it] {'loss': 0.3341, 'grad_norm': 0.7541308664883002, 'learning_rate': 5.381710901001497e-07, 'epoch': 0.86} + 86%|████████▌ | 10427/12188 [54:46<3:33:14, 7.27s/it] 86%|████████▌ | 10428/12188 [54:53<3:33:53, 7.29s/it] {'loss': 0.2916, 'grad_norm': 0.7138287785490671, 'learning_rate': 5.37571585490167e-07, 'epoch': 0.86} + 86%|████████▌ | 10428/12188 [54:53<3:33:53, 7.29s/it] 86%|████████▌ | 10429/12188 [55:00<3:29:47, 7.16s/it] {'loss': 0.3145, 'grad_norm': 0.648146490929674, 'learning_rate': 5.369723960097084e-07, 'epoch': 0.86} + 86%|████████▌ | 10429/12188 [55:00<3:29:47, 7.16s/it] 86%|████████▌ | 10430/12188 [55:08<3:35:41, 7.36s/it] {'loss': 0.3241, 'grad_norm': 0.6972389381908891, 'learning_rate': 5.36373521701089e-07, 'epoch': 0.86} + 86%|████████▌ | 10430/12188 [55:08<3:35:41, 7.36s/it] 86%|████████▌ | 10431/12188 [55:15<3:33:45, 7.30s/it] {'loss': 0.2972, 'grad_norm': 0.7153730838959854, 'learning_rate': 5.357749626066011e-07, 'epoch': 0.86} + 86%|████████▌ | 10431/12188 [55:15<3:33:45, 7.30s/it] 86%|████████▌ | 10432/12188 [55:22<3:30:38, 7.20s/it] {'loss': 0.2868, 'grad_norm': 0.689899075894772, 'learning_rate': 5.351767187685114e-07, 'epoch': 0.86} + 86%|████████▌ | 10432/12188 [55:22<3:30:38, 7.20s/it] 86%|████████▌ | 10433/12188 [55:29<3:28:36, 7.13s/it] {'loss': 0.267, 'grad_norm': 0.6772965499031012, 'learning_rate': 5.345787902290706e-07, 'epoch': 0.86} + 86%|████████▌ | 10433/12188 [55:29<3:28:36, 7.13s/it] 86%|████████▌ | 10434/12188 [55:36<3:29:03, 7.15s/it] {'loss': 0.2944, 'grad_norm': 0.7108790373578644, 'learning_rate': 5.339811770304992e-07, 'epoch': 0.86} + 86%|████████▌ | 10434/12188 [55:36<3:29:03, 7.15s/it] 86%|████████▌ | 10435/12188 [55:43<3:28:28, 7.14s/it] {'loss': 0.2699, 'grad_norm': 0.725206330414425, 'learning_rate': 5.333838792150026e-07, 'epoch': 0.86} + 86%|████████▌ | 10435/12188 [55:43<3:28:28, 7.14s/it] 86%|████████▌ | 10436/12188 [55:50<3:25:35, 7.04s/it] {'loss': 0.2825, 'grad_norm': 0.7248770246332965, 'learning_rate': 5.327868968247613e-07, 'epoch': 0.86} + 86%|████████▌ | 10436/12188 [55:50<3:25:35, 7.04s/it] 86%|████████▌ | 10437/12188 [55:57<3:23:44, 6.98s/it] {'loss': 0.2921, 'grad_norm': 0.7219802547739127, 'learning_rate': 5.32190229901931e-07, 'epoch': 0.86} + 86%|████████▌ | 10437/12188 [55:57<3:23:44, 6.98s/it] 86%|████████▌ | 10438/12188 [56:06<3:39:43, 7.53s/it] {'loss': 0.286, 'grad_norm': 0.6749262925122658, 'learning_rate': 5.315938784886499e-07, 'epoch': 0.86} + 86%|████████▌ | 10438/12188 [56:06<3:39:43, 7.53s/it] 86%|████████▌ | 10439/12188 [56:13<3:35:15, 7.38s/it] {'loss': 0.282, 'grad_norm': 0.6923021595338078, 'learning_rate': 5.309978426270296e-07, 'epoch': 0.86} + 86%|████████▌ | 10439/12188 [56:13<3:35:15, 7.38s/it] 86%|████████▌ | 10440/12188 [56:20<3:31:54, 7.27s/it] {'loss': 0.3202, 'grad_norm': 0.6858847133048704, 'learning_rate': 5.30402122359161e-07, 'epoch': 0.86} + 86%|████████▌ | 10440/12188 [56:20<3:31:54, 7.27s/it] 86%|████████▌ | 10441/12188 [56:27<3:33:29, 7.33s/it] {'loss': 0.2665, 'grad_norm': 0.6829361810851038, 'learning_rate': 5.298067177271144e-07, 'epoch': 0.86} + 86%|████████▌ | 10441/12188 [56:27<3:33:29, 7.33s/it] 86%|████████▌ | 10442/12188 [56:34<3:32:54, 7.32s/it] {'loss': 0.2972, 'grad_norm': 0.6691567775453567, 'learning_rate': 5.292116287729348e-07, 'epoch': 0.86} + 86%|████████▌ | 10442/12188 [56:34<3:32:54, 7.32s/it] 86%|████████▌ | 10443/12188 [56:41<3:30:19, 7.23s/it] {'loss': 0.2849, 'grad_norm': 0.6869261394996864, 'learning_rate': 5.286168555386478e-07, 'epoch': 0.86} + 86%|████████▌ | 10443/12188 [56:41<3:30:19, 7.23s/it] 86%|████████▌ | 10444/12188 [56:48<3:28:11, 7.16s/it] {'loss': 0.3112, 'grad_norm': 0.7247893263001779, 'learning_rate': 5.280223980662535e-07, 'epoch': 0.86} + 86%|████████▌ | 10444/12188 [56:48<3:28:11, 7.16s/it] 86%|████████▌ | 10445/12188 [56:56<3:28:23, 7.17s/it] {'loss': 0.3408, 'grad_norm': 0.759201736347826, 'learning_rate': 5.274282563977328e-07, 'epoch': 0.86} + 86%|████████▌ | 10445/12188 [56:56<3:28:23, 7.17s/it] 86%|████████▌ | 10446/12188 [57:03<3:29:02, 7.20s/it] {'loss': 0.2644, 'grad_norm': 0.742771799489983, 'learning_rate': 5.268344305750439e-07, 'epoch': 0.86} + 86%|████████▌ | 10446/12188 [57:03<3:29:02, 7.20s/it] 86%|████████▌ | 10447/12188 [57:10<3:25:34, 7.08s/it] {'loss': 0.2554, 'grad_norm': 0.8359559304628809, 'learning_rate': 5.262409206401198e-07, 'epoch': 0.86} + 86%|████████▌ | 10447/12188 [57:10<3:25:34, 7.08s/it] 86%|████████▌ | 10448/12188 [57:16<3:23:01, 7.00s/it] {'loss': 0.3266, 'grad_norm': 0.7610977194219509, 'learning_rate': 5.256477266348747e-07, 'epoch': 0.86} + 86%|████████▌ | 10448/12188 [57:16<3:23:01, 7.00s/it] 86%|████████▌ | 10449/12188 [57:24<3:29:20, 7.22s/it] {'loss': 0.31, 'grad_norm': 0.6867253169375335, 'learning_rate': 5.250548486011992e-07, 'epoch': 0.86} + 86%|████████▌ | 10449/12188 [57:24<3:29:20, 7.22s/it] 86%|████████▌ | 10450/12188 [57:35<3:56:36, 8.17s/it] {'loss': 0.288, 'grad_norm': 0.6930452762342975, 'learning_rate': 5.244622865809596e-07, 'epoch': 0.86} + 86%|████████▌ | 10450/12188 [57:35<3:56:36, 8.17s/it] 86%|████████▌ | 10451/12188 [57:42<3:47:11, 7.85s/it] {'loss': 0.3506, 'grad_norm': 0.676882146591547, 'learning_rate': 5.238700406160036e-07, 'epoch': 0.86} + 86%|████████▌ | 10451/12188 [57:42<3:47:11, 7.85s/it] 86%|████████▌ | 10452/12188 [57:49<3:39:26, 7.58s/it] {'loss': 0.27, 'grad_norm': 0.6789513957440516, 'learning_rate': 5.232781107481544e-07, 'epoch': 0.86} + 86%|████████▌ | 10452/12188 [57:49<3:39:26, 7.58s/it] 86%|████████▌ | 10453/12188 [57:55<3:31:03, 7.30s/it] {'loss': 0.257, 'grad_norm': 0.6724311551814854, 'learning_rate': 5.226864970192114e-07, 'epoch': 0.86} + 86%|████████▌ | 10453/12188 [57:55<3:31:03, 7.30s/it] 86%|████████▌ | 10454/12188 [58:03<3:37:54, 7.54s/it] {'loss': 0.2766, 'grad_norm': 0.6726300318472148, 'learning_rate': 5.220951994709555e-07, 'epoch': 0.86} + 86%|████████▌ | 10454/12188 [58:03<3:37:54, 7.54s/it] 86%|████████▌ | 10455/12188 [58:10<3:31:44, 7.33s/it] {'loss': 0.2639, 'grad_norm': 0.8151245468033351, 'learning_rate': 5.215042181451418e-07, 'epoch': 0.86} + 86%|████████▌ | 10455/12188 [58:10<3:31:44, 7.33s/it] 86%|████████▌ | 10456/12188 [58:17<3:25:28, 7.12s/it] {'loss': 0.3035, 'grad_norm': 0.7810297212754618, 'learning_rate': 5.209135530835053e-07, 'epoch': 0.86} + 86%|████████▌ | 10456/12188 [58:17<3:25:28, 7.12s/it] 86%|████████▌ | 10457/12188 [58:25<3:30:46, 7.31s/it] {'loss': 0.3284, 'grad_norm': 0.7239322153177342, 'learning_rate': 5.203232043277568e-07, 'epoch': 0.86} + 86%|████████▌ | 10457/12188 [58:25<3:30:46, 7.31s/it] 86%|████████▌ | 10458/12188 [58:34<3:52:21, 8.06s/it] {'loss': 0.3212, 'grad_norm': 0.9925581329555332, 'learning_rate': 5.197331719195864e-07, 'epoch': 0.86} + 86%|████████▌ | 10458/12188 [58:34<3:52:21, 8.06s/it] 86%|████████▌ | 10459/12188 [58:41<3:38:25, 7.58s/it] {'loss': 0.253, 'grad_norm': 0.6907674185805054, 'learning_rate': 5.191434559006625e-07, 'epoch': 0.86} + 86%|████████▌ | 10459/12188 [58:41<3:38:25, 7.58s/it] 86%|████████▌ | 10460/12188 [58:49<3:39:29, 7.62s/it] {'loss': 0.3272, 'grad_norm': 0.7372529613408563, 'learning_rate': 5.185540563126274e-07, 'epoch': 0.86} + 86%|████████▌ | 10460/12188 [58:49<3:39:29, 7.62s/it] 86%|████████▌ | 10461/12188 [58:55<3:32:16, 7.37s/it] {'loss': 0.3384, 'grad_norm': 0.7247059173116777, 'learning_rate': 5.179649731971059e-07, 'epoch': 0.86} + 86%|████████▌ | 10461/12188 [58:55<3:32:16, 7.37s/it] 86%|████████▌ | 10462/12188 [59:02<3:27:01, 7.20s/it] {'loss': 0.2811, 'grad_norm': 0.6682722268916722, 'learning_rate': 5.173762065956967e-07, 'epoch': 0.86} + 86%|████████▌ | 10462/12188 [59:02<3:27:01, 7.20s/it] 86%|████████▌ | 10463/12188 [59:09<3:24:43, 7.12s/it] {'loss': 0.3019, 'grad_norm': 0.6472516372740319, 'learning_rate': 5.167877565499774e-07, 'epoch': 0.86} + 86%|████████▌ | 10463/12188 [59:09<3:24:43, 7.12s/it] 86%|████████▌ | 10464/12188 [59:19<3:48:25, 7.95s/it] {'loss': 0.291, 'grad_norm': 0.7249973917036322, 'learning_rate': 5.161996231015049e-07, 'epoch': 0.86} + 86%|████████▌ | 10464/12188 [59:19<3:48:25, 7.95s/it] 86%|████████▌ | 10465/12188 [59:26<3:38:15, 7.60s/it] {'loss': 0.3064, 'grad_norm': 0.7463643046700458, 'learning_rate': 5.156118062918098e-07, 'epoch': 0.86} + 86%|████████▌ | 10465/12188 [59:26<3:38:15, 7.60s/it] 86%|████████▌ | 10466/12188 [59:33<3:35:54, 7.52s/it] {'loss': 0.3224, 'grad_norm': 0.6568332554130991, 'learning_rate': 5.150243061624055e-07, 'epoch': 0.86} + 86%|████████▌ | 10466/12188 [59:33<3:35:54, 7.52s/it] 86%|████████▌ | 10467/12188 [59:41<3:42:13, 7.75s/it] {'loss': 0.2629, 'grad_norm': 0.6236867891025517, 'learning_rate': 5.144371227547795e-07, 'epoch': 0.86} + 86%|████████▌ | 10467/12188 [59:41<3:42:13, 7.75s/it] 86%|████████▌ | 10468/12188 [59:48<3:36:13, 7.54s/it] {'loss': 0.2839, 'grad_norm': 0.6442095103553219, 'learning_rate': 5.138502561103959e-07, 'epoch': 0.86} + 86%|████████▌ | 10468/12188 [59:48<3:36:13, 7.54s/it] 86%|████████▌ | 10469/12188 [59:56<3:39:59, 7.68s/it] {'loss': 0.2899, 'grad_norm': 0.6640331818057943, 'learning_rate': 5.132637062707008e-07, 'epoch': 0.86} + 86%|████████▌ | 10469/12188 [59:56<3:39:59, 7.68s/it] 86%|████████▌ | 10470/12188 [1:00:04<3:34:46, 7.50s/it] {'loss': 0.2747, 'grad_norm': 0.7437788312572555, 'learning_rate': 5.126774732771129e-07, 'epoch': 0.86} + 86%|████████▌ | 10470/12188 [1:00:04<3:34:46, 7.50s/it] 86%|████████▌ | 10471/12188 [1:00:10<3:29:44, 7.33s/it] {'loss': 0.318, 'grad_norm': 0.7312759464894134, 'learning_rate': 5.120915571710328e-07, 'epoch': 0.86} + 86%|████████▌ | 10471/12188 [1:00:10<3:29:44, 7.33s/it] 86%|████████▌ | 10472/12188 [1:00:17<3:26:59, 7.24s/it] {'loss': 0.2807, 'grad_norm': 0.7178269374195719, 'learning_rate': 5.115059579938375e-07, 'epoch': 0.86} + 86%|████████▌ | 10472/12188 [1:00:17<3:26:59, 7.24s/it] 86%|████████▌ | 10473/12188 [1:00:24<3:24:10, 7.14s/it] {'loss': 0.3048, 'grad_norm': 0.7074782700998214, 'learning_rate': 5.10920675786879e-07, 'epoch': 0.86} + 86%|████████▌ | 10473/12188 [1:00:24<3:24:10, 7.14s/it] 86%|████████▌ | 10474/12188 [1:00:32<3:25:10, 7.18s/it] {'loss': 0.3122, 'grad_norm': 0.6897090010960744, 'learning_rate': 5.103357105914913e-07, 'epoch': 0.86} + 86%|████████▌ | 10474/12188 [1:00:32<3:25:10, 7.18s/it] 86%|████████▌ | 10475/12188 [1:00:39<3:25:28, 7.20s/it] {'loss': 0.3077, 'grad_norm': 1.2068623950336723, 'learning_rate': 5.097510624489816e-07, 'epoch': 0.86} + 86%|████████▌ | 10475/12188 [1:00:39<3:25:28, 7.20s/it] 86%|████████▌ | 10476/12188 [1:00:46<3:24:14, 7.16s/it] {'loss': 0.3147, 'grad_norm': 0.6248754369088179, 'learning_rate': 5.091667314006371e-07, 'epoch': 0.86} + 86%|████████▌ | 10476/12188 [1:00:46<3:24:14, 7.16s/it] 86%|████████▌ | 10477/12188 [1:00:54<3:27:59, 7.29s/it] {'loss': 0.3011, 'grad_norm': 0.724207374371232, 'learning_rate': 5.085827174877245e-07, 'epoch': 0.86} + 86%|████████▌ | 10477/12188 [1:00:54<3:27:59, 7.29s/it] 86%|████████▌ | 10478/12188 [1:01:00<3:23:03, 7.12s/it] {'loss': 0.2809, 'grad_norm': 0.7245366802623554, 'learning_rate': 5.079990207514835e-07, 'epoch': 0.86} + 86%|████████▌ | 10478/12188 [1:01:00<3:23:03, 7.12s/it] 86%|████████▌ | 10479/12188 [1:01:07<3:20:40, 7.05s/it] {'loss': 0.3055, 'grad_norm': 0.6370566911744112, 'learning_rate': 5.074156412331354e-07, 'epoch': 0.86} + 86%|████████▌ | 10479/12188 [1:01:07<3:20:40, 7.05s/it] 86%|████████▌ | 10480/12188 [1:01:14<3:20:14, 7.03s/it] {'loss': 0.312, 'grad_norm': 0.6260265257695733, 'learning_rate': 5.068325789738771e-07, 'epoch': 0.86} + 86%|████████▌ | 10480/12188 [1:01:14<3:20:14, 7.03s/it] 86%|████████▌ | 10481/12188 [1:01:22<3:25:04, 7.21s/it] {'loss': 0.2793, 'grad_norm': 0.657154062298369, 'learning_rate': 5.062498340148819e-07, 'epoch': 0.86} + 86%|████████▌ | 10481/12188 [1:01:22<3:25:04, 7.21s/it] 86%|████████▌ | 10482/12188 [1:01:29<3:21:21, 7.08s/it] {'loss': 0.2692, 'grad_norm': 0.6420476053737014, 'learning_rate': 5.05667406397305e-07, 'epoch': 0.86} + 86%|████████▌ | 10482/12188 [1:01:29<3:21:21, 7.08s/it] 86%|████████▌ | 10483/12188 [1:01:35<3:16:30, 6.92s/it] {'loss': 0.2864, 'grad_norm': 0.6372696302032489, 'learning_rate': 5.050852961622738e-07, 'epoch': 0.86} + 86%|████████▌ | 10483/12188 [1:01:35<3:16:30, 6.92s/it] 86%|███████��▌ | 10484/12188 [1:01:42<3:16:55, 6.93s/it] {'loss': 0.2883, 'grad_norm': 0.6775663239625569, 'learning_rate': 5.045035033508977e-07, 'epoch': 0.86} + 86%|████████▌ | 10484/12188 [1:01:42<3:16:55, 6.93s/it] 86%|████████▌ | 10485/12188 [1:01:49<3:17:03, 6.94s/it] {'loss': 0.3076, 'grad_norm': 0.7650482504608547, 'learning_rate': 5.039220280042623e-07, 'epoch': 0.86} + 86%|████████▌ | 10485/12188 [1:01:49<3:17:03, 6.94s/it] 86%|████████▌ | 10486/12188 [1:01:56<3:15:24, 6.89s/it] {'loss': 0.2918, 'grad_norm': 0.699839154174031, 'learning_rate': 5.03340870163429e-07, 'epoch': 0.86} + 86%|████████▌ | 10486/12188 [1:01:56<3:15:24, 6.89s/it] 86%|████████▌ | 10487/12188 [1:02:06<3:43:02, 7.87s/it] {'loss': 0.3144, 'grad_norm': 0.6941321903471727, 'learning_rate': 5.027600298694397e-07, 'epoch': 0.86} + 86%|████████▌ | 10487/12188 [1:02:06<3:43:02, 7.87s/it] 86%|████████▌ | 10488/12188 [1:02:13<3:37:26, 7.67s/it] {'loss': 0.2829, 'grad_norm': 0.6393178903505073, 'learning_rate': 5.021795071633113e-07, 'epoch': 0.86} + 86%|████████▌ | 10488/12188 [1:02:13<3:37:26, 7.67s/it] 86%|████████▌ | 10489/12188 [1:02:20<3:30:12, 7.42s/it] {'loss': 0.3035, 'grad_norm': 0.7792882008619207, 'learning_rate': 5.015993020860394e-07, 'epoch': 0.86} + 86%|████████▌ | 10489/12188 [1:02:20<3:30:12, 7.42s/it] 86%|████████▌ | 10490/12188 [1:02:27<3:24:30, 7.23s/it] {'loss': 0.2976, 'grad_norm': 0.7192574098725895, 'learning_rate': 5.010194146785985e-07, 'epoch': 0.86} + 86%|████████▌ | 10490/12188 [1:02:27<3:24:30, 7.23s/it] 86%|████████▌ | 10491/12188 [1:02:35<3:30:06, 7.43s/it] {'loss': 0.2764, 'grad_norm': 0.6805122993062842, 'learning_rate': 5.004398449819376e-07, 'epoch': 0.86} + 86%|████████▌ | 10491/12188 [1:02:35<3:30:06, 7.43s/it] 86%|████████▌ | 10492/12188 [1:02:44<3:44:32, 7.94s/it] {'loss': 0.3779, 'grad_norm': 0.8033755145454036, 'learning_rate': 4.998605930369865e-07, 'epoch': 0.86} + 86%|████████▌ | 10492/12188 [1:02:44<3:44:32, 7.94s/it] 86%|████████▌ | 10493/12188 [1:02:51<3:34:14, 7.58s/it] {'loss': 0.3132, 'grad_norm': 0.6865397524943672, 'learning_rate': 4.992816588846495e-07, 'epoch': 0.86} + 86%|████████▌ | 10493/12188 [1:02:51<3:34:14, 7.58s/it] 86%|████████▌ | 10494/12188 [1:02:58<3:35:58, 7.65s/it] {'loss': 0.3146, 'grad_norm': 0.7192866567069123, 'learning_rate': 4.987030425658118e-07, 'epoch': 0.86} + 86%|████████▌ | 10494/12188 [1:02:58<3:35:58, 7.65s/it] 86%|████████▌ | 10495/12188 [1:03:05<3:30:10, 7.45s/it] {'loss': 0.2971, 'grad_norm': 0.6818334683521221, 'learning_rate': 4.981247441213333e-07, 'epoch': 0.86} + 86%|████████▌ | 10495/12188 [1:03:05<3:30:10, 7.45s/it] 86%|████████▌ | 10496/12188 [1:03:12<3:26:51, 7.34s/it] {'loss': 0.3301, 'grad_norm': 0.6412917000791971, 'learning_rate': 4.975467635920517e-07, 'epoch': 0.86} + 86%|████████▌ | 10496/12188 [1:03:12<3:26:51, 7.34s/it] 86%|████████▌ | 10497/12188 [1:03:22<3:43:27, 7.93s/it] {'loss': 0.2897, 'grad_norm': 0.6723274480632611, 'learning_rate': 4.969691010187838e-07, 'epoch': 0.86} + 86%|████████▌ | 10497/12188 [1:03:22<3:43:27, 7.93s/it]W0818 10:49:31.285000 123048 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 10:49:31.285000 123048 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 10:49:31.285000 123048 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 10:49:31.285000 123048 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 10:49:33.823000 75765 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 10:49:33.823000 75765 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 10:49:33.823000 75765 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 10:49:33.823000 75765 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 10:49:43.476000 34083 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 10:49:43.476000 34083 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 10:49:43.476000 34083 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 10:49:43.476000 34083 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 10:50:07,065] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,093] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,096] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,088] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,104] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,103] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,106] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,106] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,106] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,111] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,117] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,125] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,125] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,128] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,128] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,129] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,119] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,133] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,130] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,139] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,140] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,143] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,135] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,138] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,139] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,139] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,139] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,153] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,149] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,149] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,149] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,156] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,156] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,156] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:07,156] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:11,228] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:11,231] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:11,241] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:11,243] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:11,252] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:11,272] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:11,273] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:11,274] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:22,556] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:22,547] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:22,547] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:22,575] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:22,695] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:22,697] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:22,715] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:22,740] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:22,758] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:22,758] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:22,760] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:22,764] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:22,777] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:22,772] [INFO] [comm.py:652:init_distributed] cdb=None +ice None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:22,797] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:22,797] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:22,797] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:22,817] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:22,861] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:22,929] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:22,929] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:22,940] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:23,061] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:23,056] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:23,056] [INFO] [comm.py:652:init_distributed] cdb=None + with `model.to('cuda')`. +[2025-08-18 10:50:23,056] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:23,056] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:23,057] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:23,057] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:23,060] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:23,058] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:23,060] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:23,061] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:23,104] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:23,104] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:23,111] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:23,112] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:23,109] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:23,109] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:23,662] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:23,883] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:23,886] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:23,889] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:23,897] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:23,898] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:23,901] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:23,902] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:34,825] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,827] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,827] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,828] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,828] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,828] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,820] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,821] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,841] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,840] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,840] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,840] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,842] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,840] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,855] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,875] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,878] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,921] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,943] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,947] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:34,994] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:35,004] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:35,031] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:35,031] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:35,040] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:35,050] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:35,056] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:35,058] [INFO] [comm.py:652:init_distributed] cdb=None +d on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:35,062] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:35,078] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:35,078] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:35,179] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:35,179] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:35,180] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:35,221] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:35,238] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:35,268] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:35,263] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:35,265] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:35,283] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:35,288] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:35,304] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:35,304] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:35,304] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:35,305] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:35,305] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:35,305] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:35,305] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:35,326] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:35,339] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:35,343] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:35,353] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:35,364] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:35,375] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:35,402] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:35,403] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:38,413] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:38,413] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:38,465] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:38,467] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:38,467] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:38,468] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:38,468] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:38,468] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 10:50:51,044] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:51,045] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:51,046] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:51,046] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:51,047] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:51,047] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:51,047] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-18 10:50:51,048] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:51,048] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 10:50:51,719] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:51,848] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:51,911] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:51,913] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:51,916] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 10:50:51,918] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:51,924] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 10:50:51,926] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 621928 samples from VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl +Rank 0: Loading altas_windows +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 537672 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl +Rank 0: Loading altas_linux +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 21578 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl +Rank 0: Loading atlas_macos +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 3680 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl +Rank 0: Loading android_action_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 5621 samples from VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 11980 samples from VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl +Rank 0: Loading web_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9459 samples from VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 328 samples from VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 54 samples from VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 480 samples from VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 240 samples from VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 944 samples from VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 472 samples from VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading mac_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 1578 samples from VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20394 samples from VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl +Rank 0: Loading web_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 14285 samples from VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl +Rank 0: Loading android_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 3590 samples from VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 21422 samples from VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 100 samples from VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_aug_cropping_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 7675 samples from VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20116 samples from VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl +Rank 0: Loading iphone_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2520 samples from VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl +Rank 0: Loading mac_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7814 samples from VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl +Rank 0: Loading android_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 17838 samples from VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading android_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9008 samples from VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_canvas_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 624 samples from VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 100652 samples from VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl with all sampling strategy +Rank 0: Loaded 28346 samples from VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl +Rank 0: Loading android_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 4907 samples from VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2635 samples from VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5234 samples from VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading ubuntu_action_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl with all sampling strategy +Rank 0: Loaded 3404 samples from VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_1 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2855 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_2 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 655 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_3 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_4 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2643 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_1 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_2 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_3 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_4 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_5 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_6 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_7 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_8 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_crop_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 358 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7946 samples from VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3973 samples from VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 993 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 630 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 249 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2538 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1269 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 172 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl +Rank 0: Loading android_ocr_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl with all sampling strategy +Rank 0: Loaded 29878 samples from VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl +Rank 0: Loading mac_orc_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl with all sampling strategy +Rank 0: Loaded 4393 samples from VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 254 samples from VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_click_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl with random:80% sampling strategy +Rank 0: Loaded 1802 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 53 samples from VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 6578 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 3287 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 542 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_crop_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 270 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 10908 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 5453 samples from VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading android_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13950 samples from VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl +Rank 0: Loading windows_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 12390 samples from VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl +Rank 0: Loading web_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13402 samples from VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl +Rank 0: Loading icon_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl with all sampling strategy +Rank 0: Loaded 81303 samples from VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl +Rank 0: Loading mac_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 1251 samples from VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl +Rank 0: Loading iphone_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 27849 samples from VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl +Rank 0: Loading web_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl with random:80% sampling strategy +Rank 0: Loaded 51607 samples from VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl +Rank 0: Loading android_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl with random:80% sampling strategy +Rank 0: Loaded 6281 samples from VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl +Rank 0: Loading windows_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 25961 samples from VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl +Rank 0: Loading windows_cropping_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl with random:40% sampling strategy +Rank 0: Loaded 9763 samples from VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl +Rank 0: Loading iphone_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl with all sampling strategy +Rank 0: Loaded 16896 samples from VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl +Rank 0: Loading iphone_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl with all sampling strategy +Rank 0: Loaded 927 samples from VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl +Rank 0: Loading mac_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl with all sampling strategy +Rank 0: Loaded 3051 samples from VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl +Rank 0: Loading android_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 25217 samples from VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading android_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 12677 samples from VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading web_canvas_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl with random:40% sampling strategy +Rank 0: Loaded 1566 samples from VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl +Rank 0: Loading web_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 174170 samples from VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 24862 samples from VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl +Rank 0: Loading android_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl with random:80% sampling strategy +Rank 0: Loaded 9142 samples from VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl +Rank 0: Loading windows_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 4229 samples from VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 4200 samples from VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl with random:80% sampling strategy +Rank 0: Loaded 2868 samples from VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_crop_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1372 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 590 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl with all sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1692 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1077 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1070 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 431 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 292 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1509 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 1207 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading uibert_train_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 4646 samples from VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl +Rank 0: Loading openapp_taperception_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 2500 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_widget_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 14878 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_mug_grounding_d240812 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl with all sampling strategy +Rank 0: Loaded 26090 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl +Rank 0: Loading private_ui_phone_2403_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 24798 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ground_d240521_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl with all sampling strategy +Rank 0: Loaded 5008 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl +Rank 0: Loading private_ui_aig_share_2406_ground_d240612_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl with all sampling strategy +Rank 0: Loaded 7903 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl +Rank 0: Loading windows_pc_agent_e_planning_cot +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 55564 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl +Rank 0: Loading windows_pc_agent_e_navigation +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl with all sampling strategy +Rank 0: Loaded 27782 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl +Rank 0: Loading os_genesis_ac_training_data +Rank 0: Skipping os_genesis_ac_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_aw_training_data +Rank 0: Skipping os_genesis_aw_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_web_training +Rank 0: Skipping os_genesis_web_training due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_1 +Rank 0: Skipping gui_odyssey_plus_1 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_2 +Rank 0: Skipping gui_odyssey_plus_2 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_custom_3 +Rank 0: Skipping gui_odyssey_plus_custom_3 due to repeat_time=0 +Rank 0: Loading mm_gui_mid +Rank 0: Skipping mm_gui_mid due to repeat_time=0 +Rank 0: Loading text_gui_mid +Rank 0: Skipping text_gui_mid due to repeat_time=0 +Rank 0: Loading gui_mid_trajectory +Rank 0: Skipping gui_mid_trajectory due to repeat_time=0 +Rank 0: Loading ubuntu_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7024 samples from VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl +Rank 0: Loading windows_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3144 samples from VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl +Rank 0: Total training samples: 6240940 +Rank 0: Formatting inputs...Skip in lazy mode +Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. +Rank 0: Length of multimodal samples: 6230528, pure textual samples: 9984 +Parameter Offload: Total persistent parameters: 755712 in 408 params + 0%| | 0/12188 [00:00 + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fd02d3456c0> +[Try #0] Failed to fetch sample 4409357 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fd02d3456c0> +Problematic sample: {'image': '20240823_045930_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Data Providers'"}, {'from': 'gpt', 'value': '\nclick(x=0.8015, y=0.921)\n'}]} + 82%|████████▏ | 10042/12188 [07:03<4:29:55, 7.55s/it] {'loss': 0.2659, 'grad_norm': 0.6694067836607709, 'learning_rate': 7.919700013163128e-07, 'epoch': 0.82} + 82%|████████▏ | 10042/12188 [07:03<4:29:55, 7.55s/it] 82%|████████▏ | 10043/12188 [07:10<4:28:54, 7.52s/it] {'loss': 0.3081, 'grad_norm': 0.6568838029651366, 'learning_rate': 7.912525264167342e-07, 'epoch': 0.82} + 82%|████████▏ | 10043/12188 [07:10<4:28:54, 7.52s/it] 82%|████████▏ | 10044/12188 [07:20<4:47:47, 8.05s/it] {'loss': 0.304, 'grad_norm': 0.7027557550168749, 'learning_rate': 7.905353487321404e-07, 'epoch': 0.82} + 82%|████████▏ | 10044/12188 [07:20<4:47:47, 8.05s/it] 82%|████████▏ | 10045/12188 [07:28<4:51:06, 8.15s/it] {'loss': 0.3283, 'grad_norm': 0.7014344359743985, 'learning_rate': 7.898184683131782e-07, 'epoch': 0.82} + 82%|████████▏ | 10045/12188 [07:28<4:51:06, 8.15s/it] 82%|████████▏ | 10046/12188 [07:35<4:41:52, 7.90s/it] {'loss': 0.3075, 'grad_norm': 0.6989999338091103, 'learning_rate': 7.891018852104709e-07, 'epoch': 0.82} + 82%|████████▏ | 10046/12188 [07:35<4:41:52, 7.90s/it] 82%|████████▏ | 10047/12188 [07:42<4:27:59, 7.51s/it] {'loss': 0.3055, 'grad_norm': 0.7039863882998292, 'learning_rate': 7.883855994746237e-07, 'epoch': 0.82} + 82%|████████▏ | 10047/12188 [07:42<4:27:59, 7.51s/it] 82%|████████▏ | 10048/12188 [07:49<4:20:11, 7.29s/it] {'loss': 0.281, 'grad_norm': 0.6379750470291076, 'learning_rate': 7.876696111562182e-07, 'epoch': 0.82} + 82%|████████▏ | 10048/12188 [07:49<4:20:11, 7.29s/it] 82%|████████▏ | 10049/12188 [07:57<4:27:27, 7.50s/it] {'loss': 0.3306, 'grad_norm': 0.7108308013156872, 'learning_rate': 7.869539203058169e-07, 'epoch': 0.82} + 82%|████████▏ | 10049/12188 [07:57<4:27:27, 7.50s/it] 82%|████████▏ | 10050/12188 [08:04<4:21:46, 7.35s/it] {'loss': 0.2741, 'grad_norm': 0.6493965404672631, 'learning_rate': 7.862385269739625e-07, 'epoch': 0.82} + 82%|████████▏ | 10050/12188 [08:04<4:21:46, 7.35s/it] 82%|████████▏ | 10051/12188 [08:11<4:19:02, 7.27s/it] {'loss': 0.3131, 'grad_norm': 0.652930706621255, 'learning_rate': 7.855234312111732e-07, 'epoch': 0.82} + 82%|████████▏ | 10051/12188 [08:11<4:19:02, 7.27s/it] 82%|████████▏ | 10052/12188 [08:18<4:12:25, 7.09s/it] {'loss': 0.3051, 'grad_norm': 0.6301630730606906, 'learning_rate': 7.848086330679483e-07, 'epoch': 0.82} + 82%|████████▏ | 10052/12188 [08:18<4:12:25, 7.09s/it] 82%|████████▏ | 10053/12188 [08:26<4:23:47, 7.41s/it] {'loss': 0.3087, 'grad_norm': 0.7162958636734652, 'learning_rate': 7.840941325947637e-07, 'epoch': 0.82} + 82%|████████▏ | 10053/12188 [08:26<4:23:47, 7.41s/it] 82%|████████▏ | 10054/12188 [08:32<4:16:26, 7.21s/it] {'loss': 0.2685, 'grad_norm': 0.7232080751071915, 'learning_rate': 7.833799298420786e-07, 'epoch': 0.82} + 82%|████████▏ | 10054/12188 [08:32<4:16:26, 7.21s/it] 82%|████████▏ | 10055/12188 [08:40<4:18:11, 7.26s/it] {'loss': 0.2823, 'grad_norm': 0.6881345156765095, 'learning_rate': 7.826660248603296e-07, 'epoch': 0.82} + 82%|████████▏ | 10055/12188 [08:40<4:18:11, 7.26s/it] 83%|████████▎ | 10056/12188 [08:47<4:19:35, 7.31s/it] {'loss': 0.3118, 'grad_norm': 0.6818179070096816, 'learning_rate': 7.819524176999288e-07, 'epoch': 0.83} + 83%|████████▎ | 10056/12188 [08:47<4:19:35, 7.31s/it] 83%|████████▎ | 10057/12188 [08:55<4:28:43, 7.57s/it] {'loss': 0.3044, 'grad_norm': 0.7040230251822188, 'learning_rate': 7.812391084112731e-07, 'epoch': 0.83} + 83%|████████▎ | 10057/12188 [08:55<4:28:43, 7.57s/it] 83%|████████▎ | 10058/12188 [09:04<4:37:44, 7.82s/it] {'loss': 0.2977, 'grad_norm': 0.6719861774037399, 'learning_rate': 7.80526097044732e-07, 'epoch': 0.83} + 83%|████████▎ | 10058/12188 [09:04<4:37:44, 7.82s/it] 83%|████████▎ | 10059/12188 [09:12<4:36:13, 7.78s/it] {'loss': 0.3021, 'grad_norm': 0.8154609876319086, 'learning_rate': 7.798133836506588e-07, 'epoch': 0.83} + 83%|████████▎ | 10059/12188 [09:12<4:36:13, 7.78s/it] 83%|████████▎ | 10060/12188 [09:19<4:29:35, 7.60s/it] {'loss': 0.3367, 'grad_norm': 0.848608694969285, 'learning_rate': 7.791009682793855e-07, 'epoch': 0.83} + 83%|████████▎ | 10060/12188 [09:19<4:29:35, 7.60s/it] 83%|████████▎ | 10061/12188 [09:26<4:21:16, 7.37s/it] {'loss': 0.3616, 'grad_norm': 0.694968435894115, 'learning_rate': 7.783888509812193e-07, 'epoch': 0.83} + 83%|████████▎ | 10061/12188 [09:26<4:21:16, 7.37s/it] 83%|████████▎ | 10062/12188 [09:32<4:16:21, 7.23s/it] {'loss': 0.3076, 'grad_norm': 0.6289032230201995, 'learning_rate': 7.776770318064497e-07, 'epoch': 0.83} + 83%|████████▎ | 10062/12188 [09:32<4:16:21, 7.23s/it] 83%|████████▎ | 10063/12188 [09:39<4:11:44, 7.11s/it] {'loss': 0.2942, 'grad_norm': 0.6826286933538148, 'learning_rate': 7.769655108053459e-07, 'epoch': 0.83} + 83%|████████▎ | 10063/12188 [09:39<4:11:44, 7.11s/it] 83%|████████▎ | 10064/12188 [09:46<4:10:48, 7.08s/it] {'loss': 0.2828, 'grad_norm': 0.6840664119753669, 'learning_rate': 7.762542880281526e-07, 'epoch': 0.83} + 83%|████████▎ | 10064/12188 [09:46<4:10:48, 7.08s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f172e040cc0> +[Try #0] Failed to fetch sample 4385424 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f172e040cc0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Wiktionary'"}, {'from': 'gpt', 'value': '\nclick(x=0.904, y=0.465)\n'}]} + 83%|████████▎ | 10065/12188 [09:55<4:25:01, 7.49s/it] {'loss': 0.2736, 'grad_norm': 0.7495870532947759, 'learning_rate': 7.755433635250948e-07, 'epoch': 0.83} + 83%|████████▎ | 10065/12188 [09:55<4:25:01, 7.49s/it] 83%|████████▎ | 10066/12188 [10:01<4:15:24, 7.22s/it] {'loss': 0.3046, 'grad_norm': 0.7602460278865553, 'learning_rate': 7.748327373463782e-07, 'epoch': 0.83} + 83%|████████▎ | 10066/12188 [10:01<4:15:24, 7.22s/it] 83%|████████▎ | 10067/12188 [10:08<4:09:37, 7.06s/it] {'loss': 0.2927, 'grad_norm': 0.6808044905587926, 'learning_rate': 7.741224095421845e-07, 'epoch': 0.83} + 83%|████████▎ | 10067/12188 [10:08<4:09:37, 7.06s/it] 83%|████████▎ | 10068/12188 [10:15<4:06:45, 6.98s/it] {'loss': 0.3394, 'grad_norm': 0.7652004776886638, 'learning_rate': 7.734123801626781e-07, 'epoch': 0.83} + 83%|████████▎ | 10068/12188 [10:15<4:06:45, 6.98s/it] 83%|████████▎ | 10069/12188 [10:22<4:04:04, 6.91s/it] {'loss': 0.3235, 'grad_norm': 0.6639111288147781, 'learning_rate': 7.727026492579976e-07, 'epoch': 0.83} + 83%|████████▎ | 10069/12188 [10:22<4:04:04, 6.91s/it] 83%|████████▎ | 10070/12188 [10:31<4:25:56, 7.53s/it] {'loss': 0.265, 'grad_norm': 0.7489950562831769, 'learning_rate': 7.719932168782656e-07, 'epoch': 0.83} + 83%|████████▎ | 10070/12188 [10:31<4:25:56, 7.53s/it] 83%|████████▎ | 10071/12188 [10:37<4:17:40, 7.30s/it] {'loss': 0.2962, 'grad_norm': 0.7723629101589876, 'learning_rate': 7.712840830735785e-07, 'epoch': 0.83} + 83%|████████▎ | 10071/12188 [10:37<4:17:40, 7.30s/it] 83%|████████▎ | 10072/12188 [10:45<4:18:08, 7.32s/it] {'loss': 0.3172, 'grad_norm': 0.768541027722128, 'learning_rate': 7.705752478940154e-07, 'epoch': 0.83} + 83%|████████▎ | 10072/12188 [10:45<4:18:08, 7.32s/it] 83%|████████▎ | 10073/12188 [10:54<4:37:37, 7.88s/it] {'loss': 0.2957, 'grad_norm': 0.7522553418699148, 'learning_rate': 7.698667113896346e-07, 'epoch': 0.83} + 83%|████████▎ | 10073/12188 [10:54<4:37:37, 7.88s/it] 83%|████████▎ | 10074/12188 [11:04<4:56:18, 8.41s/it] {'loss': 0.3384, 'grad_norm': 0.7673432749302894, 'learning_rate': 7.69158473610469e-07, 'epoch': 0.83} + 83%|████████▎ | 10074/12188 [11:04<4:56:18, 8.41s/it] 83%|████████▎ | 10075/12188 [11:12<4:51:38, 8.28s/it] {'loss': 0.3171, 'grad_norm': 0.8018378664664306, 'learning_rate': 7.684505346065363e-07, 'epoch': 0.83} + 83%|████████▎ | 10075/12188 [11:12<4:51:38, 8.28s/it] 83%|████████▎ | 10076/12188 [11:18<4:33:01, 7.76s/it] {'loss': 0.3173, 'grad_norm': 0.7109265133681996, 'learning_rate': 7.677428944278271e-07, 'epoch': 0.83} + 83%|████████▎ | 10076/12188 [11:18<4:33:01, 7.76s/it] 83%|████████▎ | 10077/12188 [11:25<4:21:23, 7.43s/it] {'loss': 0.2705, 'grad_norm': 0.6898189432998911, 'learning_rate': 7.670355531243145e-07, 'epoch': 0.83} + 83%|████████▎ | 10077/12188 [11:25<4:21:23, 7.43s/it] 83%|████████▎ | 10078/12188 [11:35<4:47:52, 8.19s/it] {'loss': 0.2925, 'grad_norm': 0.7093954286130688, 'learning_rate': 7.663285107459517e-07, 'epoch': 0.83} + 83%|████████▎ | 10078/12188 [11:35<4:47:52, 8.19s/it] 83%|████████▎ | 10079/12188 [11:43<4:49:52, 8.25s/it] {'loss': 0.2763, 'grad_norm': 0.7574675657732958, 'learning_rate': 7.65621767342668e-07, 'epoch': 0.83} + 83%|████████▎ | 10079/12188 [11:43<4:49:52, 8.25s/it] 83%|████████▎ | 10080/12188 [11:50<4:39:15, 7.95s/it] {'loss': 0.312, 'grad_norm': 0.7124400894932973, 'learning_rate': 7.649153229643708e-07, 'epoch': 0.83} + 83%|████████▎ | 10080/12188 [11:50<4:39:15, 7.95s/it] 83%|████████▎ | 10081/12188 [11:58<4:32:01, 7.75s/it] {'loss': 0.3325, 'grad_norm': 1.0847845415445831, 'learning_rate': 7.6420917766095e-07, 'epoch': 0.83} + 83%|████████▎ | 10081/12188 [11:58<4:32:01, 7.75s/it] 83%|████████▎ | 10082/12188 [12:06<4:36:32, 7.88s/it] {'loss': 0.306, 'grad_norm': 0.7129825287262971, 'learning_rate': 7.63503331482271e-07, 'epoch': 0.83} + 83%|████████▎ | 10082/12188 [12:06<4:36:32, 7.88s/it] 83%|���███████▎ | 10083/12188 [12:15<4:50:01, 8.27s/it] {'loss': 0.2968, 'grad_norm': 0.7040769495675181, 'learning_rate': 7.627977844781815e-07, 'epoch': 0.83} + 83%|████████▎ | 10083/12188 [12:15<4:50:01, 8.27s/it] 83%|████████▎ | 10084/12188 [12:22<4:32:46, 7.78s/it] {'loss': 0.3002, 'grad_norm': 0.8182252910162848, 'learning_rate': 7.620925366985033e-07, 'epoch': 0.83} + 83%|████████▎ | 10084/12188 [12:22<4:32:46, 7.78s/it] 83%|████████▎ | 10085/12188 [12:30<4:37:48, 7.93s/it] {'loss': 0.271, 'grad_norm': 0.7862939754966901, 'learning_rate': 7.613875881930416e-07, 'epoch': 0.83} + 83%|████████▎ | 10085/12188 [12:30<4:37:48, 7.93s/it] 83%|████████▎ | 10086/12188 [12:37<4:29:51, 7.70s/it] {'loss': 0.2987, 'grad_norm': 0.7124290871620932, 'learning_rate': 7.606829390115799e-07, 'epoch': 0.83} + 83%|████████▎ | 10086/12188 [12:37<4:29:51, 7.70s/it] 83%|████████▎ | 10087/12188 [12:44<4:19:38, 7.41s/it] {'loss': 0.28, 'grad_norm': 0.9886649651976749, 'learning_rate': 7.599785892038764e-07, 'epoch': 0.83} + 83%|████████▎ | 10087/12188 [12:44<4:19:38, 7.41s/it] 83%|████████▎ | 10088/12188 [12:54<4:44:47, 8.14s/it] {'loss': 0.2761, 'grad_norm': 0.6634874606142072, 'learning_rate': 7.592745388196748e-07, 'epoch': 0.83} + 83%|████████▎ | 10088/12188 [12:54<4:44:47, 8.14s/it] 83%|████████▎ | 10089/12188 [13:00<4:31:20, 7.76s/it] {'loss': 0.2782, 'grad_norm': 0.6629123360003163, 'learning_rate': 7.585707879086901e-07, 'epoch': 0.83} + 83%|████████▎ | 10089/12188 [13:00<4:31:20, 7.76s/it] 83%|████████▎ | 10090/12188 [13:09<4:43:02, 8.09s/it] {'loss': 0.3056, 'grad_norm': 0.6812177497979843, 'learning_rate': 7.578673365206224e-07, 'epoch': 0.83} + 83%|████████▎ | 10090/12188 [13:09<4:43:02, 8.09s/it] 83%|████████▎ | 10091/12188 [13:19<5:03:26, 8.68s/it] {'loss': 0.3095, 'grad_norm': 0.669282223082241, 'learning_rate': 7.571641847051492e-07, 'epoch': 0.83} + 83%|████████▎ | 10091/12188 [13:19<5:03:26, 8.68s/it] 83%|████████▎ | 10092/12188 [13:29<5:14:20, 9.00s/it] {'loss': 0.3004, 'grad_norm': 0.6524228563022577, 'learning_rate': 7.564613325119241e-07, 'epoch': 0.83} + 83%|████████▎ | 10092/12188 [13:29<5:14:20, 9.00s/it] 83%|████████▎ | 10093/12188 [13:36<4:51:51, 8.36s/it] {'loss': 0.2753, 'grad_norm': 0.7139426120716742, 'learning_rate': 7.557587799905813e-07, 'epoch': 0.83} + 83%|████████▎ | 10093/12188 [13:36<4:51:51, 8.36s/it] 83%|████████▎ | 10094/12188 [13:48<5:26:02, 9.34s/it] {'loss': 0.2803, 'grad_norm': 0.6932378947427449, 'learning_rate': 7.550565271907357e-07, 'epoch': 0.83} + 83%|████████▎ | 10094/12188 [13:48<5:26:02, 9.34s/it] 83%|████████▎ | 10095/12188 [13:56<5:15:04, 9.03s/it] {'loss': 0.2995, 'grad_norm': 0.7388890800971697, 'learning_rate': 7.543545741619762e-07, 'epoch': 0.83} + 83%|████████▎ | 10095/12188 [13:56<5:15:04, 9.03s/it] 83%|████████▎ | 10096/12188 [14:03<4:53:48, 8.43s/it] {'loss': 0.2605, 'grad_norm': 0.6703445300258937, 'learning_rate': 7.536529209538773e-07, 'epoch': 0.83} + 83%|████████▎ | 10096/12188 [14:03<4:53:48, 8.43s/it] 83%|████████▎ | 10097/12188 [14:10<4:37:29, 7.96s/it] {'loss': 0.2862, 'grad_norm': 0.5759045610828644, 'learning_rate': 7.52951567615986e-07, 'epoch': 0.83} + 83%|████████▎ | 10097/12188 [14:10<4:37:29, 7.96s/it] 83%|████████▎ | 10098/12188 [14:17<4:25:17, 7.62s/it] {'loss': 0.2747, 'grad_norm': 0.7425209229759971, 'learning_rate': 7.522505141978309e-07, 'epoch': 0.83} + 83%|████████▎ | 10098/12188 [14:17<4:25:17, 7.62s/it] 83%|████████▎ | 10099/12188 [14:23<4:14:22, 7.31s/it] {'loss': 0.282, 'grad_norm': 0.7247908946256503, 'learning_rate': 7.515497607489213e-07, 'epoch': 0.83} + 83%|████████▎ | 10099/12188 [14:23<4:14:22, 7.31s/it] 83%|████████▎ | 10100/12188 [14:33<4:38:38, 8.01s/it] {'loss': 0.3032, 'grad_norm': 0.6360200951867712, 'learning_rate': 7.508493073187411e-07, 'epoch': 0.83} + 83%|████████▎ | 10100/12188 [14:33<4:38:38, 8.01s/it] 83%|████████▎ | 10101/12188 [14:42<4:53:34, 8.44s/it] {'loss': 0.3148, 'grad_norm': 0.7779863330978688, 'learning_rate': 7.501491539567574e-07, 'epoch': 0.83} + 83%|████████▎ | 10101/12188 [14:42<4:53:34, 8.44s/it] 83%|████████▎ | 10102/12188 [14:50<4:45:52, 8.22s/it] {'loss': 0.319, 'grad_norm': 0.7275033725327077, 'learning_rate': 7.494493007124109e-07, 'epoch': 0.83} + 83%|████████▎ | 10102/12188 [14:50<4:45:52, 8.22s/it] 83%|████████▎ | 10103/12188 [14:57<4:29:15, 7.75s/it] {'loss': 0.2968, 'grad_norm': 0.6948678017211177, 'learning_rate': 7.487497476351258e-07, 'epoch': 0.83} + 83%|████████▎ | 10103/12188 [14:57<4:29:15, 7.75s/it] 83%|████████▎ | 10104/12188 [15:06<4:47:16, 8.27s/it] {'loss': 0.3555, 'grad_norm': 0.7281302609553751, 'learning_rate': 7.480504947743044e-07, 'epoch': 0.83} + 83%|████████▎ | 10104/12188 [15:06<4:47:16, 8.27s/it] 83%|████████▎ | 10105/12188 [15:13<4:29:39, 7.77s/it] {'loss': 0.3123, 'grad_norm': 0.6426411694290217, 'learning_rate': 7.473515421793248e-07, 'epoch': 0.83} + 83%|████████▎ | 10105/12188 [15:13<4:29:39, 7.77s/it] 83%|████████▎ | 10106/12188 [15:21<4:39:27, 8.05s/it] {'loss': 0.2862, 'grad_norm': 0.6975479428697758, 'learning_rate': 7.466528898995479e-07, 'epoch': 0.83} + 83%|████████▎ | 10106/12188 [15:21<4:39:27, 8.05s/it] 83%|████████▎ | 10107/12188 [15:28<4:28:04, 7.73s/it] {'loss': 0.3074, 'grad_norm': 0.7622877372846111, 'learning_rate': 7.459545379843108e-07, 'epoch': 0.83} + 83%|████████▎ | 10107/12188 [15:28<4:28:04, 7.73s/it] 83%|████████▎ | 10108/12188 [15:36<4:24:33, 7.63s/it] {'loss': 0.2927, 'grad_norm': 0.6771001742578777, 'learning_rate': 7.452564864829281e-07, 'epoch': 0.83} + 83%|████████▎ | 10108/12188 [15:36<4:24:33, 7.63s/it] 83%|████████▎ | 10109/12188 [15:44<4:33:12, 7.88s/it] {'loss': 0.3006, 'grad_norm': 0.653229240738029, 'learning_rate': 7.445587354446975e-07, 'epoch': 0.83} + 83%|████████▎ | 10109/12188 [15:44<4:33:12, 7.88s/it] 83%|████████▎ | 10110/12188 [15:52<4:33:17, 7.89s/it] {'loss': 0.3452, 'grad_norm': 0.7326996366078978, 'learning_rate': 7.438612849188915e-07, 'epoch': 0.83} + 83%|████████▎ | 10110/12188 [15:52<4:33:17, 7.89s/it] 83%|████████▎ | 10111/12188 [15:59<4:23:51, 7.62s/it] {'loss': 0.3539, 'grad_norm': 0.6628722309791439, 'learning_rate': 7.43164134954763e-07, 'epoch': 0.83} + 83%|████████▎ | 10111/12188 [15:59<4:23:51, 7.62s/it] 83%|████████▎ | 10112/12188 [16:06<4:15:09, 7.37s/it] {'loss': 0.2953, 'grad_norm': 0.6728660195973715, 'learning_rate': 7.424672856015458e-07, 'epoch': 0.83} + 83%|████████▎ | 10112/12188 [16:06<4:15:09, 7.37s/it] 83%|████████▎ | 10113/12188 [16:15<4:26:43, 7.71s/it] {'loss': 0.3142, 'grad_norm': 0.6936255560562267, 'learning_rate': 7.417707369084476e-07, 'epoch': 0.83} + 83%|████████▎ | 10113/12188 [16:15<4:26:43, 7.71s/it] 83%|████████▎ | 10114/12188 [16:21<4:17:57, 7.46s/it] {'loss': 0.3179, 'grad_norm': 0.7684826138767574, 'learning_rate': 7.41074488924659e-07, 'epoch': 0.83} + 83%|████████▎ | 10114/12188 [16:21<4:17:57, 7.46s/it] 83%|████████▎ | 10115/12188 [16:28<4:09:31, 7.22s/it] {'loss': 0.3237, 'grad_norm': 0.703692531396334, 'learning_rate': 7.40378541699347e-07, 'epoch': 0.83} + 83%|████████▎ | 10115/12188 [16:28<4:09:31, 7.22s/it] 83%|████████▎ | 10116/12188 [16:35<4:03:42, 7.06s/it] {'loss': 0.3019, 'grad_norm': 0.6643505674678118, 'learning_rate': 7.396828952816587e-07, 'epoch': 0.83} + 83%|████████▎ | 10116/12188 [16:35<4:03:42, 7.06s/it] 83%|████████▎ | 10117/12188 [16:44<4:21:09, 7.57s/it] {'loss': 0.2678, 'grad_norm': 0.6803382851713176, 'learning_rate': 7.389875497207205e-07, 'epoch': 0.83} + 83%|████████▎ | 10117/12188 [16:44<4:21:09, 7.57s/it] 83%|████████▎ | 10118/12188 [16:50<4:12:58, 7.33s/it] {'loss': 0.2899, 'grad_norm': 0.7918378310505687, 'learning_rate': 7.382925050656348e-07, 'epoch': 0.83} + 83%|████████▎ | 10118/12188 [16:50<4:12:58, 7.33s/it] 83%|████████▎ | 10119/12188 [16:57<4:05:38, 7.12s/it] {'loss': 0.2977, 'grad_norm': 0.7038283593298607, 'learning_rate': 7.375977613654861e-07, 'epoch': 0.83} + 83%|████████▎ | 10119/12188 [16:57<4:05:38, 7.12s/it] 83%|████████▎ | 10120/12188 [17:03<3:57:40, 6.90s/it] {'loss': 0.3079, 'grad_norm': 0.7707435404365823, 'learning_rate': 7.369033186693359e-07, 'epoch': 0.83} + 83%|████████▎ | 10120/12188 [17:03<3:57:40, 6.90s/it] 83%|████████▎ | 10121/12188 [17:10<3:59:09, 6.94s/it] {'loss': 0.3017, 'grad_norm': 1.3381445347956011, 'learning_rate': 7.362091770262231e-07, 'epoch': 0.83} + 83%|████████▎ | 10121/12188 [17:10<3:59:09, 6.94s/it] 83%|████████▎ | 10122/12188 [17:17<3:56:07, 6.86s/it] {'loss': 0.2912, 'grad_norm': 0.7178564864557997, 'learning_rate': 7.355153364851686e-07, 'epoch': 0.83} + 83%|████████▎ | 10122/12188 [17:17<3:56:07, 6.86s/it] 83%|████████▎ | 10123/12188 [17:24<3:55:40, 6.85s/it] {'loss': 0.2565, 'grad_norm': 0.6662846206995062, 'learning_rate': 7.348217970951687e-07, 'epoch': 0.83} + 83%|████████▎ | 10123/12188 [17:24<3:55:40, 6.85s/it] 83%|████████▎ | 10124/12188 [17:31<3:55:20, 6.84s/it] {'loss': 0.3226, 'grad_norm': 0.6524936958891756, 'learning_rate': 7.341285589052022e-07, 'epoch': 0.83} + 83%|████████▎ | 10124/12188 [17:31<3:55:20, 6.84s/it] 83%|████████▎ | 10125/12188 [17:38<3:55:33, 6.85s/it] {'loss': 0.3199, 'grad_norm': 0.7301771531681354, 'learning_rate': 7.334356219642219e-07, 'epoch': 0.83} + 83%|████████▎ | 10125/12188 [17:38<3:55:33, 6.85s/it] 83%|████████▎ | 10126/12188 [17:44<3:56:21, 6.88s/it] {'loss': 0.341, 'grad_norm': 0.7783946217917983, 'learning_rate': 7.327429863211633e-07, 'epoch': 0.83} + 83%|████████▎ | 10126/12188 [17:44<3:56:21, 6.88s/it] 83%|████████▎ | 10127/12188 [17:51<3:53:31, 6.80s/it] {'loss': 0.2657, 'grad_norm': 0.7004370143193185, 'learning_rate': 7.320506520249404e-07, 'epoch': 0.83} + 83%|████████▎ | 10127/12188 [17:51<3:53:31, 6.80s/it] 83%|████████▎ | 10128/12188 [17:59<4:08:43, 7.24s/it] {'loss': 0.2872, 'grad_norm': 0.7011859681950081, 'learning_rate': 7.313586191244421e-07, 'epoch': 0.83} + 83%|████████▎ | 10128/12188 [17:59<4:08:43, 7.24s/it] 83%|████████▎ | 10129/12188 [18:06<4:01:21, 7.03s/it] {'loss': 0.3042, 'grad_norm': 0.6743235056052237, 'learning_rate': 7.306668876685402e-07, 'epoch': 0.83} + 83%|████████▎ | 10129/12188 [18:06<4:01:21, 7.03s/it] 83%|████████▎ | 10130/12188 [18:12<3:54:05, 6.82s/it] {'loss': 0.3172, 'grad_norm': 0.6976056330847829, 'learning_rate': 7.299754577060847e-07, 'epoch': 0.83} + 83%|████████▎ | 10130/12188 [18:12<3:54:05, 6.82s/it] 83%|████████▎ | 10131/12188 [18:20<3:58:35, 6.96s/it] {'loss': 0.2958, 'grad_norm': 0.6358573930576686, 'learning_rate': 7.292843292859009e-07, 'epoch': 0.83} + 83%|████████▎ | 10131/12188 [18:20<3:58:35, 6.96s/it] 83%|████████▎ | 10132/12188 [18:27<4:01:33, 7.05s/it] {'loss': 0.2991, 'grad_norm': 0.7716399434031991, 'learning_rate': 7.285935024567975e-07, 'epoch': 0.83} + 83%|████████▎ | 10132/12188 [18:27<4:01:33, 7.05s/it] 83%|████████▎ | 10133/12188 [18:35<4:13:52, 7.41s/it] {'loss': 0.2716, 'grad_norm': 1.1320617780928306, 'learning_rate': 7.279029772675572e-07, 'epoch': 0.83} + 83%|████████▎ | 10133/12188 [18:35<4:13:52, 7.41s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 83%|████████▎ | 10134/12188 [18:41<3:59:38, 7.00s/it] {'loss': 0.6362, 'grad_norm': 0.5641867941224109, 'learning_rate': 7.27212753766946e-07, 'epoch': 0.83} + 83%|████████▎ | 10134/12188 [18:41<3:59:38, 7.00s/it] 83%|████████▎ | 10135/12188 [18:48<4:01:57, 7.07s/it] {'loss': 0.293, 'grad_norm': 0.7337317000117262, 'learning_rate': 7.265228320037054e-07, 'epoch': 0.83} + 83%|████████▎ | 10135/12188 [18:48<4:01:57, 7.07s/it] 83%|████████▎ | 10136/12188 [18:55<4:01:03, 7.05s/it] {'loss': 0.2928, 'grad_norm': 0.6698936042868271, 'learning_rate': 7.258332120265554e-07, 'epoch': 0.83} + 83%|████████▎ | 10136/12188 [18:55<4:01:03, 7.05s/it] 83%|████████▎ | 10137/12188 [19:02<4:01:01, 7.05s/it] {'loss': 0.3145, 'grad_norm': 0.7031422690883417, 'learning_rate': 7.251438938841981e-07, 'epoch': 0.83} + 83%|████████▎ | 10137/12188 [19:02<4:01:01, 7.05s/it] 83%|████████▎ | 10138/12188 [19:09<3:55:29, 6.89s/it] {'loss': 0.3281, 'grad_norm': 0.8151297897587783, 'learning_rate': 7.244548776253102e-07, 'epoch': 0.83} + 83%|████████▎ | 10138/12188 [19:09<3:55:29, 6.89s/it] 83%|████████▎ | 10139/12188 [19:19<4:31:15, 7.94s/it] {'loss': 0.3111, 'grad_norm': 0.7276805542950563, 'learning_rate': 7.237661632985493e-07, 'epoch': 0.83} + 83%|████████▎ | 10139/12188 [19:19<4:31:15, 7.94s/it] 83%|████████▎ | 10140/12188 [19:27<4:24:40, 7.75s/it] {'loss': 0.2746, 'grad_norm': 0.7388024152547777, 'learning_rate': 7.230777509525527e-07, 'epoch': 0.83} + 83%|████████▎ | 10140/12188 [19:27<4:24:40, 7.75s/it] 83%|████████▎ | 10141/12188 [19:33<4:12:13, 7.39s/it] {'loss': 0.2893, 'grad_norm': 0.7212528969898304, 'learning_rate': 7.223896406359326e-07, 'epoch': 0.83} + 83%|████████▎ | 10141/12188 [19:33<4:12:13, 7.39s/it] 83%|████████▎ | 10142/12188 [19:41<4:20:41, 7.64s/it] {'loss': 0.2512, 'grad_norm': 0.9707466372236226, 'learning_rate': 7.217018323972852e-07, 'epoch': 0.83} + 83%|████████▎ | 10142/12188 [19:41<4:20:41, 7.64s/it] 83%|████████▎ | 10143/12188 [19:51<4:41:01, 8.25s/it] {'loss': 0.321, 'grad_norm': 0.8014674469865452, 'learning_rate': 7.210143262851793e-07, 'epoch': 0.83} + 83%|████████▎ | 10143/12188 [19:51<4:41:01, 8.25s/it] 83%|████████▎ | 10144/12188 [19:58<4:29:03, 7.90s/it] {'loss': 0.3207, 'grad_norm': 0.6816029383724682, 'learning_rate': 7.203271223481672e-07, 'epoch': 0.83} + 83%|████████▎ | 10144/12188 [19:58<4:29:03, 7.90s/it] 83%|████████▎ | 10145/12188 [20:08<4:51:12, 8.55s/it] {'loss': 0.2921, 'grad_norm': 0.6965551256628942, 'learning_rate': 7.196402206347792e-07, 'epoch': 0.83} + 83%|████████▎ | 10145/12188 [20:08<4:51:12, 8.55s/it] 83%|████████▎ | 10146/12188 [20:16<4:39:15, 8.21s/it] {'loss': 0.2561, 'grad_norm': 0.7939449091237148, 'learning_rate': 7.189536211935205e-07, 'epoch': 0.83} + 83%|████████▎ | 10146/12188 [20:16<4:39:15, 8.21s/it] 83%|████████▎ | 10147/12188 [20:22<4:25:02, 7.79s/it] {'loss': 0.3272, 'grad_norm': 0.6677011593365014, 'learning_rate': 7.182673240728804e-07, 'epoch': 0.83} + 83%|████████▎ | 10147/12188 [20:22<4:25:02, 7.79s/it] 83%|████████▎ | 10148/12188 [20:30<4:20:36, 7.67s/it] {'loss': 0.3304, 'grad_norm': 0.6756560295688715, 'learning_rate': 7.175813293213224e-07, 'epoch': 0.83} + 83%|████████▎ | 10148/12188 [20:30<4:20:36, 7.67s/it] 83%|████████▎ | 10149/12188 [20:38<4:26:20, 7.84s/it] {'loss': 0.301, 'grad_norm': 0.6114765150952517, 'learning_rate': 7.168956369872898e-07, 'epoch': 0.83} + 83%|████████▎ | 10149/12188 [20:38<4:26:20, 7.84s/it] 83%|████████▎ | 10150/12188 [20:46<4:22:50, 7.74s/it] {'loss': 0.2921, 'grad_norm': 0.6974623863487545, 'learning_rate': 7.162102471192067e-07, 'epoch': 0.83} + 83%|████████▎ | 10150/12188 [20:46<4:22:50, 7.74s/it] 83%|████████▎ | 10151/12188 [20:56<4:49:28, 8.53s/it] {'loss': 0.293, 'grad_norm': 0.8010168194010194, 'learning_rate': 7.155251597654727e-07, 'epoch': 0.83} + 83%|████████▎ | 10151/12188 [20:56<4:49:28, 8.53s/it] 83%|████████▎ | 10152/12188 [21:03<4:31:38, 8.01s/it] {'loss': 0.2765, 'grad_norm': 0.834032971046517, 'learning_rate': 7.148403749744687e-07, 'epoch': 0.83} + 83%|████████▎ | 10152/12188 [21:03<4:31:38, 8.01s/it] 83%|████████▎ | 10153/12188 [21:10<4:21:03, 7.70s/it] {'loss': 0.3135, 'grad_norm': 0.6820917789203933, 'learning_rate': 7.141558927945536e-07, 'epoch': 0.83} + 83%|████████▎ | 10153/12188 [21:10<4:21:03, 7.70s/it] 83%|████████▎ | 10154/12188 [21:19<4:35:25, 8.12s/it] {'loss': 0.2845, 'grad_norm': 0.7355230399211167, 'learning_rate': 7.134717132740626e-07, 'epoch': 0.83} + 83%|████████▎ | 10154/12188 [21:19<4:35:25, 8.12s/it] 83%|████████▎ | 10155/12188 [21:25<4:20:18, 7.68s/it] {'loss': 0.2536, 'grad_norm': 0.6493570167816212, 'learning_rate': 7.127878364613133e-07, 'epoch': 0.83} + 83%|████████▎ | 10155/12188 [21:25<4:20:18, 7.68s/it] 83%|████████▎ | 10156/12188 [21:32<4:09:27, 7.37s/it] {'loss': 0.2658, 'grad_norm': 0.7500649343570959, 'learning_rate': 7.121042624045981e-07, 'epoch': 0.83} + 83%|████████▎ | 10156/12188 [21:32<4:09:27, 7.37s/it] 83%|████████▎ | 10157/12188 [21:41<4:23:21, 7.78s/it] {'loss': 0.2956, 'grad_norm': 0.7040977962139067, 'learning_rate': 7.114209911521907e-07, 'epoch': 0.83} + 83%|████████▎ | 10157/12188 [21:41<4:23:21, 7.78s/it] 83%|████████▎ | 10158/12188 [21:48<4:15:51, 7.56s/it] {'loss': 0.2752, 'grad_norm': 0.7294570626054767, 'learning_rate': 7.107380227523442e-07, 'epoch': 0.83} + 83%|████████▎ | 10158/12188 [21:48<4:15:51, 7.56s/it] 83%|████████▎ | 10159/12188 [21:57<4:28:58, 7.95s/it] {'loss': 0.281, 'grad_norm': 0.8923738164751359, 'learning_rate': 7.100553572532859e-07, 'epoch': 0.83} + 83%|████████▎ | 10159/12188 [21:57<4:28:58, 7.95s/it] 83%|████████▎ | 10160/12188 [22:04<4:18:08, 7.64s/it] {'loss': 0.2748, 'grad_norm': 0.6646100208774294, 'learning_rate': 7.093729947032274e-07, 'epoch': 0.83} + 83%|████████▎ | 10160/12188 [22:04<4:18:08, 7.64s/it] 83%|████████▎ | 10161/12188 [22:12<4:20:49, 7.72s/it] {'loss': 0.2878, 'grad_norm': 0.829037310564732, 'learning_rate': 7.086909351503529e-07, 'epoch': 0.83} + 83%|████████▎ | 10161/12188 [22:12<4:20:49, 7.72s/it] 83%|████████▎ | 10162/12188 [22:19<4:16:54, 7.61s/it] {'loss': 0.2835, 'grad_norm': 1.0992092868213428, 'learning_rate': 7.080091786428317e-07, 'epoch': 0.83} + 83%|████████▎ | 10162/12188 [22:19<4:16:54, 7.61s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fd49722abb0> +[Try #0] Failed to fetch sample 4616838 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fd49722abb0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Class: form-control input-sm'"}, {'from': 'gpt', 'value': '\nclick(x=0.489, y=0.228)\n'}]} + 83%|████████▎ | 10163/12188 [22:28<4:32:31, 8.07s/it] {'loss': 0.3411, 'grad_norm': 0.6915997049603271, 'learning_rate': 7.073277252288063e-07, 'epoch': 0.83} + 83%|████████▎ | 10163/12188 [22:28<4:32:31, 8.07s/it] 83%|████████▎ | 10164/12188 [22:36<4:32:25, 8.08s/it] {'loss': 0.3374, 'grad_norm': 0.7210457337712466, 'learning_rate': 7.066465749563994e-07, 'epoch': 0.83} + 83%|████████▎ | 10164/12188 [22:36<4:32:25, 8.08s/it] 83%|████████▎ | 10165/12188 [22:43<4:16:34, 7.61s/it] {'loss': 0.2668, 'grad_norm': 0.7003786014633521, 'learning_rate': 7.059657278737136e-07, 'epoch': 0.83} + 83%|████████▎ | 10165/12188 [22:43<4:16:34, 7.61s/it] 83%|████████▎ | 10166/12188 [22:50<4:10:56, 7.45s/it] {'loss': 0.2682, 'grad_norm': 0.6499426191429359, 'learning_rate': 7.052851840288299e-07, 'epoch': 0.83} + 83%|████████▎ | 10166/12188 [22:50<4:10:56, 7.45s/it] 83%|████████▎ | 10167/12188 [22:58<4:19:44, 7.71s/it] {'loss': 0.309, 'grad_norm': 0.6751932423601864, 'learning_rate': 7.04604943469806e-07, 'epoch': 0.83} + 83%|████████▎ | 10167/12188 [22:58<4:19:44, 7.71s/it] 83%|████████▎ | 10168/12188 [23:06<4:19:34, 7.71s/it] {'loss': 0.3118, 'grad_norm': 0.6869485470497234, 'learning_rate': 7.039250062446806e-07, 'epoch': 0.83} + 83%|████████▎ | 10168/12188 [23:06<4:19:34, 7.71s/it] 83%|████████▎ | 10169/12188 [23:12<4:07:04, 7.34s/it] {'loss': 0.286, 'grad_norm': 0.7337821041991711, 'learning_rate': 7.032453724014681e-07, 'epoch': 0.83} + 83%|████████▎ | 10169/12188 [23:12<4:07:04, 7.34s/it] 83%|████████▎ | 10170/12188 [23:23<4:38:35, 8.28s/it] {'loss': 0.2962, 'grad_norm': 0.7107508338183295, 'learning_rate': 7.025660419881641e-07, 'epoch': 0.83} + 83%|████████▎ | 10170/12188 [23:23<4:38:35, 8.28s/it] 83%|████████▎ | 10171/12188 [23:29<4:22:30, 7.81s/it] {'loss': 0.3197, 'grad_norm': 0.6776453510353874, 'learning_rate': 7.01887015052743e-07, 'epoch': 0.83} + 83%|████████▎ | 10171/12188 [23:29<4:22:30, 7.81s/it] 83%|████████▎ | 10172/12188 [23:38<4:32:03, 8.10s/it] {'loss': 0.3357, 'grad_norm': 0.7209207195731273, 'learning_rate': 7.012082916431545e-07, 'epoch': 0.83} + 83%|████████▎ | 10172/12188 [23:38<4:32:03, 8.10s/it] 83%|████████▎ | 10173/12188 [23:45<4:20:07, 7.75s/it] {'loss': 0.2901, 'grad_norm': 0.740024882074249, 'learning_rate': 7.005298718073311e-07, 'epoch': 0.83} + 83%|████████▎ | 10173/12188 [23:45<4:20:07, 7.75s/it] 83%|████████▎ | 10174/12188 [23:52<4:08:13, 7.40s/it] {'loss': 0.3031, 'grad_norm': 0.6511401777789159, 'learning_rate': 6.998517555931788e-07, 'epoch': 0.83} + 83%|████████▎ | 10174/12188 [23:52<4:08:13, 7.40s/it] 83%|████████▎ | 10175/12188 [23:59<4:02:22, 7.22s/it] {'loss': 0.2739, 'grad_norm': 0.7572224158075594, 'learning_rate': 6.991739430485883e-07, 'epoch': 0.83} + 83%|████████▎ | 10175/12188 [23:59<4:02:22, 7.22s/it] 83%|████████▎ | 10176/12188 [24:09<4:30:23, 8.06s/it] {'loss': 0.294, 'grad_norm': 0.7519064400896487, 'learning_rate': 6.984964342214245e-07, 'epoch': 0.83} + 83%|████████▎ | 10176/12188 [24:09<4:30:23, 8.06s/it] 84%|████████▎ | 10177/12188 [24:15<4:19:06, 7.73s/it] {'loss': 0.3085, 'grad_norm': 0.7613563822467044, 'learning_rate': 6.978192291595304e-07, 'epoch': 0.83} + 84%|████████▎ | 10177/12188 [24:16<4:19:06, 7.73s/it] 84%|████████▎ | 10178/12188 [24:22<4:10:25, 7.48s/it] {'loss': 0.3209, 'grad_norm': 0.6597066740857469, 'learning_rate': 6.971423279107309e-07, 'epoch': 0.84} + 84%|████████▎ | 10178/12188 [24:22<4:10:25, 7.48s/it] 84%|████████▎ | 10179/12188 [24:30<4:13:31, 7.57s/it] {'loss': 0.2921, 'grad_norm': 0.6438441697989762, 'learning_rate': 6.964657305228262e-07, 'epoch': 0.84} + 84%|████████▎ | 10179/12188 [24:30<4:13:31, 7.57s/it] 84%|████████▎ | 10180/12188 [24:37<4:04:55, 7.32s/it] {'loss': 0.302, 'grad_norm': 0.6830474227918166, 'learning_rate': 6.95789437043598e-07, 'epoch': 0.84} + 84%|████████▎ | 10180/12188 [24:37<4:04:55, 7.32s/it] 84%|████████▎ | 10181/12188 [24:44<4:00:16, 7.18s/it] {'loss': 0.3461, 'grad_norm': 0.7511126229780828, 'learning_rate': 6.951134475208049e-07, 'epoch': 0.84} + 84%|████████▎ | 10181/12188 [24:44<4:00:16, 7.18s/it] 84%|████████▎ | 10182/12188 [24:53<4:16:51, 7.68s/it] {'loss': 0.3097, 'grad_norm': 0.737813412751922, 'learning_rate': 6.944377620021831e-07, 'epoch': 0.84} + 84%|████████▎ | 10182/12188 [24:53<4:16:51, 7.68s/it] 84%|████████▎ | 10183/12188 [25:00<4:11:03, 7.51s/it] {'loss': 0.258, 'grad_norm': 0.6898218545085553, 'learning_rate': 6.937623805354493e-07, 'epoch': 0.84} + 84%|████████▎ | 10183/12188 [25:00<4:11:03, 7.51s/it] 84%|████████▎ | 10184/12188 [25:07<4:05:22, 7.35s/it] {'loss': 0.3484, 'grad_norm': 0.7236619561899384, 'learning_rate': 6.930873031682983e-07, 'epoch': 0.84} + 84%|████████▎ | 10184/12188 [25:07<4:05:22, 7.35s/it] 84%|████████▎ | 10185/12188 [25:13<3:59:31, 7.17s/it] {'loss': 0.2968, 'grad_norm': 0.7648567579991399, 'learning_rate': 6.924125299484014e-07, 'epoch': 0.84} + 84%|████████▎ | 10185/12188 [25:13<3:59:31, 7.17s/it] 84%|████████▎ | 10186/12188 [25:21<4:01:28, 7.24s/it] {'loss': 0.347, 'grad_norm': 0.690540892504803, 'learning_rate': 6.917380609234125e-07, 'epoch': 0.84} + 84%|████████▎ | 10186/12188 [25:21<4:01:28, 7.24s/it] 84%|████████▎ | 10187/12188 [25:30<4:19:44, 7.79s/it] {'loss': 0.3005, 'grad_norm': 0.6509096370960392, 'learning_rate': 6.910638961409583e-07, 'epoch': 0.84} + 84%|████████▎ | 10187/12188 [25:30<4:19:44, 7.79s/it] 84%|████████▎ | 10188/12188 [25:37<4:11:27, 7.54s/it] {'loss': 0.2729, 'grad_norm': 0.7037512192230222, 'learning_rate': 6.903900356486504e-07, 'epoch': 0.84} + 84%|████████▎ | 10188/12188 [25:37<4:11:27, 7.54s/it] 84%|████████▎ | 10189/12188 [25:44<4:02:44, 7.29s/it] {'loss': 0.3303, 'grad_norm': 0.8647689936651005, 'learning_rate': 6.89716479494073e-07, 'epoch': 0.84} + 84%|████████▎ | 10189/12188 [25:44<4:02:44, 7.29s/it] 84%|████████▎ | 10190/12188 [25:50<3:56:54, 7.11s/it] {'loss': 0.2835, 'grad_norm': 0.7290872424603703, 'learning_rate': 6.890432277247943e-07, 'epoch': 0.84} + 84%|████████▎ | 10190/12188 [25:50<3:56:54, 7.11s/it] 84%|████████▎ | 10191/12188 [25:59<4:13:25, 7.61s/it] {'loss': 0.3077, 'grad_norm': 0.774309148978802, 'learning_rate': 6.883702803883563e-07, 'epoch': 0.84} + 84%|████████▎ | 10191/12188 [25:59<4:13:25, 7.61s/it] 84%|████████▎ | 10192/12188 [26:06<4:08:59, 7.48s/it] {'loss': 0.3095, 'grad_norm': 1.8388038896457768, 'learning_rate': 6.876976375322808e-07, 'epoch': 0.84} + 84%|████████▎ | 10192/12188 [26:06<4:08:59, 7.48s/it] 84%|████████▎ | 10193/12188 [26:13<4:05:46, 7.39s/it] {'loss': 0.2953, 'grad_norm': 0.6676410756918433, 'learning_rate': 6.870252992040705e-07, 'epoch': 0.84} + 84%|████████▎ | 10193/12188 [26:13<4:05:46, 7.39s/it] 84%|████████▎ | 10194/12188 [26:20<3:58:03, 7.16s/it] {'loss': 0.2985, 'grad_norm': 0.7858091253424574, 'learning_rate': 6.86353265451205e-07, 'epoch': 0.84} + 84%|████████▎ | 10194/12188 [26:20<3:58:03, 7.16s/it] 84%|████████▎ | 10195/12188 [26:27<3:58:45, 7.19s/it] {'loss': 0.2967, 'grad_norm': 0.6986036351078516, 'learning_rate': 6.856815363211399e-07, 'epoch': 0.84} + 84%|████████▎ | 10195/12188 [26:27<3:58:45, 7.19s/it] 84%|████████▎ | 10196/12188 [26:35<3:58:41, 7.19s/it] {'loss': 0.2983, 'grad_norm': 0.6350717831710674, 'learning_rate': 6.85010111861315e-07, 'epoch': 0.84} + 84%|████████▎ | 10196/12188 [26:35<3:58:41, 7.19s/it] 84%|████████▎ | 10197/12188 [26:41<3:54:34, 7.07s/it] {'loss': 0.2706, 'grad_norm': 0.6593579941528923, 'learning_rate': 6.843389921191423e-07, 'epoch': 0.84} + 84%|████████▎ | 10197/12188 [26:41<3:54:34, 7.07s/it] 84%|████████▎ | 10198/12188 [26:48<3:51:29, 6.98s/it] {'loss': 0.2811, 'grad_norm': 0.7062049924558031, 'learning_rate': 6.836681771420162e-07, 'epoch': 0.84} + 84%|████████▎ | 10198/12188 [26:48<3:51:29, 6.98s/it] 84%|████████▎ | 10199/12188 [26:55<3:48:52, 6.90s/it] {'loss': 0.2923, 'grad_norm': 0.7642103969668868, 'learning_rate': 6.829976669773098e-07, 'epoch': 0.84} + 84%|████████▎ | 10199/12188 [26:55<3:48:52, 6.90s/it][2025-08-18 11:25:04,859] [WARNING] [stage3.py:2118:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time + 84%|████████▎ | 10200/12188 [27:03<4:04:46, 7.39s/it] {'loss': 0.2972, 'grad_norm': 0.6955342630759509, 'learning_rate': 6.823274616723707e-07, 'epoch': 0.84} + 84%|████████▎ | 10200/12188 [27:03<4:04:46, 7.39s/it] 84%|████████▎ | 10201/12188 [27:10<3:57:11, 7.16s/it] {'loss': 0.2995, 'grad_norm': 0.9057425193114538, 'learning_rate': 6.816575612745302e-07, 'epoch': 0.84} + 84%|████████▎ | 10201/12188 [27:10<3:57:11, 7.16s/it] 84%|████████▎ | 10202/12188 [27:18<4:09:49, 7.55s/it] {'loss': 0.3408, 'grad_norm': 0.7061158963511436, 'learning_rate': 6.809879658310953e-07, 'epoch': 0.84} + 84%|████████▎ | 10202/12188 [27:18<4:09:49, 7.55s/it] 84%|████████▎ | 10203/12188 [27:25<4:03:36, 7.36s/it] {'loss': 0.315, 'grad_norm': 0.6479562047233145, 'learning_rate': 6.803186753893515e-07, 'epoch': 0.84} + 84%|████████▎ | 10203/12188 [27:25<4:03:36, 7.36s/it] 84%|████████▎ | 10204/12188 [27:33<4:05:53, 7.44s/it] {'loss': 0.3141, 'grad_norm': 0.7740617291458284, 'learning_rate': 6.796496899965627e-07, 'epoch': 0.84} + 84%|████████▎ | 10204/12188 [27:33<4:05:53, 7.44s/it] 84%|████████▎ | 10205/12188 [27:40<4:01:43, 7.31s/it] {'loss': 0.3159, 'grad_norm': 0.7641798931849958, 'learning_rate': 6.78981009699971e-07, 'epoch': 0.84} + 84%|████████▎ | 10205/12188 [27:40<4:01:43, 7.31s/it] 84%|████████▎ | 10206/12188 [27:47<3:59:20, 7.25s/it] {'loss': 0.3465, 'grad_norm': 0.9493460362929982, 'learning_rate': 6.783126345467978e-07, 'epoch': 0.84} + 84%|███████��▎ | 10206/12188 [27:47<3:59:20, 7.25s/it] 84%|████████▎ | 10207/12188 [27:54<4:00:56, 7.30s/it] {'loss': 0.3212, 'grad_norm': 0.6876856084368813, 'learning_rate': 6.776445645842439e-07, 'epoch': 0.84} + 84%|████████▎ | 10207/12188 [27:54<4:00:56, 7.30s/it] 84%|████████▍ | 10208/12188 [28:04<4:23:04, 7.97s/it] {'loss': 0.3074, 'grad_norm': 0.734811536242437, 'learning_rate': 6.769767998594857e-07, 'epoch': 0.84} + 84%|████████▍ | 10208/12188 [28:04<4:23:04, 7.97s/it] 84%|████████▍ | 10209/12188 [28:11<4:11:15, 7.62s/it] {'loss': 0.3095, 'grad_norm': 0.7194913839187138, 'learning_rate': 6.763093404196808e-07, 'epoch': 0.84} + 84%|████████▍ | 10209/12188 [28:11<4:11:15, 7.62s/it] 84%|████████▍ | 10210/12188 [28:18<4:07:56, 7.52s/it] {'loss': 0.3914, 'grad_norm': 0.7223729239127515, 'learning_rate': 6.756421863119634e-07, 'epoch': 0.84} + 84%|████████▍ | 10210/12188 [28:18<4:07:56, 7.52s/it] 84%|████████▍ | 10211/12188 [28:25<4:03:44, 7.40s/it] {'loss': 0.2938, 'grad_norm': 0.7826423586015759, 'learning_rate': 6.749753375834467e-07, 'epoch': 0.84} + 84%|████████▍ | 10211/12188 [28:25<4:03:44, 7.40s/it] 84%|████████▍ | 10212/12188 [28:34<4:19:21, 7.88s/it] {'loss': 0.2913, 'grad_norm': 0.893940623294124, 'learning_rate': 6.743087942812243e-07, 'epoch': 0.84} + 84%|████████▍ | 10212/12188 [28:34<4:19:21, 7.88s/it] 84%|████████▍ | 10213/12188 [28:43<4:26:48, 8.11s/it] {'loss': 0.3151, 'grad_norm': 0.7056750580241833, 'learning_rate': 6.736425564523641e-07, 'epoch': 0.84} + 84%|████████▍ | 10213/12188 [28:43<4:26:48, 8.11s/it] 84%|████████▍ | 10214/12188 [28:51<4:25:29, 8.07s/it] {'loss': 0.2715, 'grad_norm': 0.6953198958850048, 'learning_rate': 6.729766241439167e-07, 'epoch': 0.84} + 84%|████████▍ | 10214/12188 [28:51<4:25:29, 8.07s/it] 84%|████████▍ | 10215/12188 [28:58<4:15:49, 7.78s/it] {'loss': 0.3057, 'grad_norm': 0.6675656748795157, 'learning_rate': 6.723109974029074e-07, 'epoch': 0.84} + 84%|████████▍ | 10215/12188 [28:58<4:15:49, 7.78s/it] 84%|████████▍ | 10216/12188 [29:05<4:09:28, 7.59s/it] {'loss': 0.2714, 'grad_norm': 0.8431128488633739, 'learning_rate': 6.716456762763435e-07, 'epoch': 0.84} + 84%|████████▍ | 10216/12188 [29:05<4:09:28, 7.59s/it] 84%|████████▍ | 10217/12188 [29:12<4:05:41, 7.48s/it] {'loss': 0.2916, 'grad_norm': 0.6893376576716229, 'learning_rate': 6.709806608112068e-07, 'epoch': 0.84} + 84%|████████▍ | 10217/12188 [29:12<4:05:41, 7.48s/it] 84%|████████▍ | 10218/12188 [29:19<3:58:32, 7.27s/it] {'loss': 0.2742, 'grad_norm': 0.6924132231097415, 'learning_rate': 6.703159510544616e-07, 'epoch': 0.84} + 84%|████████▍ | 10218/12188 [29:19<3:58:32, 7.27s/it] 84%|████████▍ | 10219/12188 [29:27<4:08:10, 7.56s/it] {'loss': 0.3335, 'grad_norm': 0.7231155240965993, 'learning_rate': 6.696515470530468e-07, 'epoch': 0.84} + 84%|████████▍ | 10219/12188 [29:27<4:08:10, 7.56s/it] 84%|████████▍ | 10220/12188 [29:34<4:02:42, 7.40s/it] {'loss': 0.3031, 'grad_norm': 0.7395204117987156, 'learning_rate': 6.689874488538833e-07, 'epoch': 0.84} + 84%|████████▍ | 10220/12188 [29:34<4:02:42, 7.40s/it] 84%|████████▍ | 10221/12188 [29:42<4:04:57, 7.47s/it] {'loss': 0.2939, 'grad_norm': 0.727386393875644, 'learning_rate': 6.683236565038676e-07, 'epoch': 0.84} + 84%|████████▍ | 10221/12188 [29:42<4:04:57, 7.47s/it] 84%|████████▍ | 10222/12188 [29:49<3:55:49, 7.20s/it] {'loss': 0.3085, 'grad_norm': 0.6554513890567404, 'learning_rate': 6.676601700498764e-07, 'epoch': 0.84} + 84%|████████▍ | 10222/12188 [29:49<3:55:49, 7.20s/it] 84%|████████▍ | 10223/12188 [29:55<3:52:41, 7.11s/it] {'loss': 0.2651, 'grad_norm': 0.7210751120333388, 'learning_rate': 6.669969895387623e-07, 'epoch': 0.84} + 84%|████████▍ | 10223/12188 [29:55<3:52:41, 7.11s/it] 84%|████████▍ | 10224/12188 [30:02<3:50:10, 7.03s/it] {'loss': 0.3309, 'grad_norm': 0.7324475833451857, 'learning_rate': 6.663341150173597e-07, 'epoch': 0.84} + 84%|████████▍ | 10224/12188 [30:02<3:50:10, 7.03s/it] 84%|████████▍ | 10225/12188 [30:09<3:48:09, 6.97s/it] {'loss': 0.3043, 'grad_norm': 0.6648655411337359, 'learning_rate': 6.656715465324803e-07, 'epoch': 0.84} + 84%|████████▍ | 10225/12188 [30:09<3:48:09, 6.97s/it] 84%|████████▍ | 10226/12188 [30:16<3:51:03, 7.07s/it] {'loss': 0.3246, 'grad_norm': 0.7687340552298876, 'learning_rate': 6.650092841309114e-07, 'epoch': 0.84} + 84%|████████▍ | 10226/12188 [30:16<3:51:03, 7.07s/it] 84%|████████▍ | 10227/12188 [30:24<3:52:44, 7.12s/it] {'loss': 0.3062, 'grad_norm': 0.6655645707128116, 'learning_rate': 6.643473278594231e-07, 'epoch': 0.84} + 84%|████████▍ | 10227/12188 [30:24<3:52:44, 7.12s/it] 84%|████████▍ | 10228/12188 [30:31<3:56:44, 7.25s/it] {'loss': 0.2922, 'grad_norm': 0.6491278791839943, 'learning_rate': 6.636856777647599e-07, 'epoch': 0.84} + 84%|████████▍ | 10228/12188 [30:31<3:56:44, 7.25s/it] 84%|████████▍ | 10229/12188 [30:38<3:49:26, 7.03s/it] {'loss': 0.2797, 'grad_norm': 0.729117388229119, 'learning_rate': 6.630243338936476e-07, 'epoch': 0.84} + 84%|████████▍ | 10229/12188 [30:38<3:49:26, 7.03s/it] 84%|████████▍ | 10230/12188 [30:44<3:46:56, 6.95s/it] {'loss': 0.2939, 'grad_norm': 0.6996409689435872, 'learning_rate': 6.623632962927895e-07, 'epoch': 0.84} + 84%|████████▍ | 10230/12188 [30:45<3:46:56, 6.95s/it] 84%|████████▍ | 10231/12188 [30:51<3:43:43, 6.86s/it] {'loss': 0.3009, 'grad_norm': 0.7057184095854309, 'learning_rate': 6.617025650088671e-07, 'epoch': 0.84} + 84%|████████▍ | 10231/12188 [30:51<3:43:43, 6.86s/it] 84%|████████▍ | 10232/12188 [30:58<3:40:11, 6.75s/it] {'loss': 0.2634, 'grad_norm': 0.6979267139394612, 'learning_rate': 6.610421400885392e-07, 'epoch': 0.84} + 84%|████████▍ | 10232/12188 [30:58<3:40:11, 6.75s/it] 84%|████████▍ | 10233/12188 [31:04<3:40:17, 6.76s/it] {'loss': 0.3229, 'grad_norm': 0.7838761242298083, 'learning_rate': 6.603820215784429e-07, 'epoch': 0.84} + 84%|████████▍ | 10233/12188 [31:04<3:40:17, 6.76s/it] 84%|████████▍ | 10234/12188 [31:13<4:02:11, 7.44s/it] {'loss': 0.2931, 'grad_norm': 0.665905678110749, 'learning_rate': 6.597222095251965e-07, 'epoch': 0.84} + 84%|████████▍ | 10234/12188 [31:13<4:02:11, 7.44s/it] 84%|████████▍ | 10235/12188 [31:21<4:02:25, 7.45s/it] {'loss': 0.2809, 'grad_norm': 0.7848516458245218, 'learning_rate': 6.590627039753955e-07, 'epoch': 0.84} + 84%|████████▍ | 10235/12188 [31:21<4:02:25, 7.45s/it] 84%|████████▍ | 10236/12188 [31:28<3:56:36, 7.27s/it] {'loss': 0.3371, 'grad_norm': 0.70395026637664, 'learning_rate': 6.58403504975611e-07, 'epoch': 0.84} + 84%|████████▍ | 10236/12188 [31:28<3:56:36, 7.27s/it] 84%|████████▍ | 10237/12188 [31:35<3:59:49, 7.38s/it] {'loss': 0.282, 'grad_norm': 0.7122698149128062, 'learning_rate': 6.577446125723958e-07, 'epoch': 0.84} + 84%|████████▍ | 10237/12188 [31:35<3:59:49, 7.38s/it] 84%|████████▍ | 10238/12188 [31:42<3:56:18, 7.27s/it] {'loss': 0.3083, 'grad_norm': 0.7738840564919226, 'learning_rate': 6.570860268122808e-07, 'epoch': 0.84} + 84%|████████▍ | 10238/12188 [31:42<3:56:18, 7.27s/it] 84%|████████▍ | 10239/12188 [31:52<4:20:55, 8.03s/it] {'loss': 0.2652, 'grad_norm': 0.7162122719846237, 'learning_rate': 6.564277477417719e-07, 'epoch': 0.84} + 84%|████████▍ | 10239/12188 [31:52<4:20:55, 8.03s/it] 84%|████████▍ | 10240/12188 [32:00<4:18:03, 7.95s/it] {'loss': 0.298, 'grad_norm': 1.2170997431810169, 'learning_rate': 6.557697754073577e-07, 'epoch': 0.84} + 84%|████████▍ | 10240/12188 [32:00<4:18:03, 7.95s/it] 84%|████████▍ | 10241/12188 [32:08<4:21:43, 8.07s/it] {'loss': 0.2817, 'grad_norm': 0.6879762572292433, 'learning_rate': 6.551121098555019e-07, 'epoch': 0.84} + 84%|████████▍ | 10241/12188 [32:08<4:21:43, 8.07s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f6cbc5dbec0> +[Try #0] Failed to fetch sample 4617819 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f6cbc5dbec0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Contact us'"}, {'from': 'gpt', 'value': '\nclick(x=0.8505, y=0.32)\n'}]} +[2025-08-18 11:30:18,876] [WARNING] [stage3.py:2118:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time + 84%|████████▍ | 10242/12188 [32:17<4:30:48, 8.35s/it] {'loss': 0.2827, 'grad_norm': 0.6111233599219296, 'learning_rate': 6.544547511326482e-07, 'epoch': 0.84} + 84%|████████▍ | 10242/12188 [32:17<4:30:48, 8.35s/it] 84%|████████▍ | 10243/12188 [32:24<4:18:52, 7.99s/it] {'loss': 0.3527, 'grad_norm': 0.7159289250032476, 'learning_rate': 6.537976992852196e-07, 'epoch': 0.84} + 84%|████████▍ | 10243/12188 [32:24<4:18:52, 7.99s/it] 84%|████████▍ | 10244/12188 [32:36<4:54:00, 9.07s/it] {'loss': 0.3003, 'grad_norm': 0.7099313418466957, 'learning_rate': 6.531409543596146e-07, 'epoch': 0.84} + 84%|████████▍ | 10244/12188 [32:36<4:54:00, 9.07s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f6c63b14090> +[Try #0] Failed to fetch sample 4335455 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f6c63b14090> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Watches'"}, {'from': 'gpt', 'value': '\nclick(x=0.681, y=0.266)\n'}]} + 84%|████████▍ | 10245/12188 [32:43<4:29:37, 8.33s/it] {'loss': 0.2825, 'grad_norm': 0.8923925295168889, 'learning_rate': 6.524845164022104e-07, 'epoch': 0.84} + 84%|████████▍ | 10245/12188 [32:43<4:29:37, 8.33s/it] 84%|████████▍ | 10246/12188 [32:49<4:11:16, 7.76s/it] {'loss': 0.3114, 'grad_norm': 0.6949366196049648, 'learning_rate': 6.51828385459366e-07, 'epoch': 0.84} + 84%|████████▍ | 10246/12188 [32:49<4:11:16, 7.76s/it] 84%|████████▍ | 10247/12188 [32:56<4:06:13, 7.61s/it] {'loss': 0.3042, 'grad_norm': 0.662161572951465, 'learning_rate': 6.511725615774139e-07, 'epoch': 0.84} + 84%|████████▍ | 10247/12188 [32:56<4:06:13, 7.61s/it] 84%|████████▍ | 10248/12188 [33:03<3:56:32, 7.32s/it] {'loss': 0.2698, 'grad_norm': 0.7045084844036463, 'learning_rate': 6.505170448026699e-07, 'epoch': 0.84} + 84%|████████▍ | 10248/12188 [33:03<3:56:32, 7.32s/it] 84%|████████▍ | 10249/12188 [33:12<4:12:21, 7.81s/it] {'loss': 0.3218, 'grad_norm': 0.890645398862087, 'learning_rate': 6.498618351814229e-07, 'epoch': 0.84} + 84%|████████▍ | 10249/12188 [33:12<4:12:21, 7.81s/it] 84%|████████▍ | 10250/12188 [33:19<4:03:29, 7.54s/it] {'loss': 0.2785, 'grad_norm': 0.6394767603357617, 'learning_rate': 6.492069327599454e-07, 'epoch': 0.84} + 84%|████████▍ | 10250/12188 [33:19<4:03:29, 7.54s/it] 84%|████████▍ | 10251/12188 [33:26<4:02:30, 7.51s/it] {'loss': 0.3092, 'grad_norm': 0.6532410427622589, 'learning_rate': 6.485523375844826e-07, 'epoch': 0.84} + 84%|████████▍ | 10251/12188 [33:26<4:02:30, 7.51s/it] 84%|████████▍ | 10252/12188 [33:33<3:52:21, 7.20s/it] {'loss': 0.3298, 'grad_norm': 0.7020893510834382, 'learning_rate': 6.478980497012632e-07, 'epoch': 0.84} + 84%|████████▍ | 10252/12188 [33:33<3:52:21, 7.20s/it] 84%|████████▍ | 10253/12188 [33:40<3:48:15, 7.08s/it] {'loss': 0.3063, 'grad_norm': 0.6828575942090274, 'learning_rate': 6.472440691564924e-07, 'epoch': 0.84} + 84%|████████▍ | 10253/12188 [33:40<3:48:15, 7.08s/it] 84%|████████▍ | 10254/12188 [33:48<4:02:47, 7.53s/it] {'loss': 0.3167, 'grad_norm': 0.6446680350918014, 'learning_rate': 6.46590395996351e-07, 'epoch': 0.84} + 84%|████████▍ | 10254/12188 [33:48<4:02:47, 7.53s/it] 84%|████████▍ | 10255/12188 [33:55<3:55:34, 7.31s/it] {'loss': 0.3073, 'grad_norm': 0.688361935611369, 'learning_rate': 6.459370302670015e-07, 'epoch': 0.84} + 84%|████████▍ | 10255/12188 [33:55<3:55:34, 7.31s/it] 84%|████████▍ | 10256/12188 [34:02<3:54:40, 7.29s/it] {'loss': 0.2817, 'grad_norm': 0.6498375464490341, 'learning_rate': 6.452839720145848e-07, 'epoch': 0.84} + 84%|████████▍ | 10256/12188 [34:02<3:54:40, 7.29s/it] 84%|████████▍ | 10257/12188 [34:10<3:56:34, 7.35s/it] {'loss': 0.2654, 'grad_norm': 0.7979966927003347, 'learning_rate': 6.446312212852162e-07, 'epoch': 0.84} + 84%|████████▍ | 10257/12188 [34:10<3:56:34, 7.35s/it] 84%|████████▍ | 10258/12188 [34:17<3:59:45, 7.45s/it] {'loss': 0.3026, 'grad_norm': 0.6837185797665534, 'learning_rate': 6.439787781249945e-07, 'epoch': 0.84} + 84%|███████���▍ | 10258/12188 [34:17<3:59:45, 7.45s/it] 84%|████████▍ | 10259/12188 [34:24<3:52:12, 7.22s/it] {'loss': 0.3094, 'grad_norm': 0.7426755400218565, 'learning_rate': 6.433266425799933e-07, 'epoch': 0.84} + 84%|████████▍ | 10259/12188 [34:24<3:52:12, 7.22s/it] 84%|████████▍ | 10260/12188 [34:33<4:09:40, 7.77s/it] {'loss': 0.2921, 'grad_norm': 0.6445027987026241, 'learning_rate': 6.426748146962635e-07, 'epoch': 0.84} + 84%|████████▍ | 10260/12188 [34:33<4:09:40, 7.77s/it] 84%|████████▍ | 10261/12188 [34:40<4:05:19, 7.64s/it] {'loss': 0.3285, 'grad_norm': 0.6501413723204233, 'learning_rate': 6.420232945198395e-07, 'epoch': 0.84} + 84%|████████▍ | 10261/12188 [34:40<4:05:19, 7.64s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f2b12659800> +[Try #0] Failed to fetch sample 4558880 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f2b12659800> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Settings - Sleeping'"}, {'from': 'gpt', 'value': '\nclick(x=0.566, y=0.012)\n'}]} + 84%|████████▍ | 10262/12188 [34:48<3:59:45, 7.47s/it] {'loss': 0.3055, 'grad_norm': 0.8758095594013837, 'learning_rate': 6.413720820967273e-07, 'epoch': 0.84} + 84%|████████▍ | 10262/12188 [34:48<3:59:45, 7.47s/it] 84%|████████▍ | 10263/12188 [34:55<4:02:32, 7.56s/it] {'loss': 0.2843, 'grad_norm': 0.6965286097996044, 'learning_rate': 6.407211774729171e-07, 'epoch': 0.84} + 84%|████████▍ | 10263/12188 [34:55<4:02:32, 7.56s/it] 84%|████████▍ | 10264/12188 [35:02<3:58:59, 7.45s/it] {'loss': 0.2721, 'grad_norm': 0.733394441636145, 'learning_rate': 6.400705806943724e-07, 'epoch': 0.84} + 84%|████████▍ | 10264/12188 [35:03<3:58:59, 7.45s/it] 84%|████████▍ | 10265/12188 [35:10<4:00:59, 7.52s/it] {'loss': 0.3014, 'grad_norm': 0.7608593737810193, 'learning_rate': 6.394202918070391e-07, 'epoch': 0.84} + 84%|████████▍ | 10265/12188 [35:10<4:00:59, 7.52s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f3399917ba0> +[Try #0] Failed to fetch sample 4698401 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f3399917ba0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Code of Conduct'"}, {'from': 'gpt', 'value': '\nclick(x=0.917, y=0.558)\n'}]} + 84%|████████▍ | 10266/12188 [35:17<3:57:20, 7.41s/it] {'loss': 0.3153, 'grad_norm': 1.022102653375809, 'learning_rate': 6.387703108568394e-07, 'epoch': 0.84} + 84%|████████▍ | 10266/12188 [35:17<3:57:20, 7.41s/it] 84%|████████▍ | 10267/12188 [35:24<3:50:58, 7.21s/it] {'loss': 0.3076, 'grad_norm': 0.6790884344565646, 'learning_rate': 6.381206378896726e-07, 'epoch': 0.84} + 84%|████████▍ | 10267/12188 [35:24<3:50:58, 7.21s/it] 84%|████████▍ | 10268/12188 [35:31<3:48:36, 7.14s/it] {'loss': 0.2759, 'grad_norm': 0.6890011406566402, 'learning_rate': 6.374712729514198e-07, 'epoch': 0.84} + 84%|████████▍ | 10268/12188 [35:31<3:48:36, 7.14s/it] 84%|████████▍ | 10269/12188 [35:38<3:47:36, 7.12s/it] {'loss': 0.2974, 'grad_norm': 0.7077517902088416, 'learning_rate': 6.368222160879356e-07, 'epoch': 0.84} + 84%|████████▍ | 10269/12188 [35:38<3:47:36, 7.12s/it] 84%|████████▍ | 10270/12188 [35:46<3:50:14, 7.20s/it] {'loss': 0.3037, 'grad_norm': 0.6986708272733356, 'learning_rate': 6.361734673450565e-07, 'epoch': 0.84} + 84%|████████▍ | 10270/12188 [35:46<3:50:14, 7.20s/it] 84%|████████▍ | 10271/12188 [35:54<4:01:51, 7.57s/it] {'loss': 0.3123, 'grad_norm': 0.8075717981915276, 'learning_rate': 6.35525026768597e-07, 'epoch': 0.84} + 84%|████████▍ | 10271/12188 [35:54<4:01:51, 7.57s/it] 84%|████████▍ | 10272/12188 [36:01<3:52:20, 7.28s/it] {'loss': 0.2987, 'grad_norm': 0.7442355903139858, 'learning_rate': 6.348768944043482e-07, 'epoch': 0.84} + 84%|████████▍ | 10272/12188 [36:01<3:52:20, 7.28s/it] 84%|████████▍ | 10273/12188 [36:07<3:45:44, 7.07s/it] {'loss': 0.2798, 'grad_norm': 0.6939547713249683, 'learning_rate': 6.342290702980785e-07, 'epoch': 0.84} + 84%|████████▍ | 10273/12188 [36:07<3:45:44, 7.07s/it] 84%|████████▍ | 10274/12188 [36:16<4:05:20, 7.69s/it] {'loss': 0.3072, 'grad_norm': 0.6816121374682521, 'learning_rate': 6.335815544955392e-07, 'epoch': 0.84} + 84%|████████▍ | 10274/12188 [36:16<4:05:20, 7.69s/it] 84%|████████▍ | 10275/12188 [36:23<3:53:52, 7.34s/it] {'loss': 0.2844, 'grad_norm': 0.7406183269751925, 'learning_rate': 6.329343470424542e-07, 'epoch': 0.84} + 84%|████████▍ | 10275/12188 [36:23<3:53:52, 7.34s/it] 84%|████████▍ | 10276/12188 [36:30<3:50:42, 7.24s/it] {'loss': 0.3091, 'grad_norm': 0.6923676368617356, 'learning_rate': 6.322874479845309e-07, 'epoch': 0.84} + 84%|████████▍ | 10276/12188 [36:30<3:50:42, 7.24s/it] 84%|████████▍ | 10277/12188 [36:36<3:45:29, 7.08s/it] {'loss': 0.3176, 'grad_norm': 0.9901397099828627, 'learning_rate': 6.316408573674492e-07, 'epoch': 0.84} + 84%|████████▍ | 10277/12188 [36:37<3:45:29, 7.08s/it] 84%|████████▍ | 10278/12188 [36:43<3:42:29, 6.99s/it] {'loss': 0.2983, 'grad_norm': 0.6914159267111583, 'learning_rate': 6.309945752368718e-07, 'epoch': 0.84} + 84%|████████▍ | 10278/12188 [36:43<3:42:29, 6.99s/it] 84%|████████▍ | 10279/12188 [36:51<3:49:47, 7.22s/it] {'loss': 0.3283, 'grad_norm': 0.7349655859005626, 'learning_rate': 6.303486016384392e-07, 'epoch': 0.84} + 84%|████████▍ | 10279/12188 [36:51<3:49:47, 7.22s/it] 84%|████████▍ | 10280/12188 [36:59<3:58:27, 7.50s/it] {'loss': 0.3117, 'grad_norm': 0.7534559330448495, 'learning_rate': 6.29702936617767e-07, 'epoch': 0.84} + 84%|████████▍ | 10280/12188 [36:59<3:58:27, 7.50s/it] 84%|████████▍ | 10281/12188 [37:06<3:54:36, 7.38s/it] {'loss': 0.3211, 'grad_norm': 0.6551607270601425, 'learning_rate': 6.290575802204535e-07, 'epoch': 0.84} + 84%|████████▍ | 10281/12188 [37:06<3:54:36, 7.38s/it] 84%|████████▍ | 10282/12188 [37:13<3:52:40, 7.32s/it] {'loss': 0.3198, 'grad_norm': 0.6970938798989922, 'learning_rate': 6.284125324920698e-07, 'epoch': 0.84} + 84%|████████▍ | 10282/12188 [37:13<3:52:40, 7.32s/it] 84%|████████▍ | 10283/12188 [37:21<3:49:42, 7.23s/it] {'loss': 0.3265, 'grad_norm': 0.7216633505705045, 'learning_rate': 6.277677934781695e-07, 'epoch': 0.84} + 84%|████████▍ | 10283/12188 [37:21<3:49:42, 7.23s/it] 84%|████████▍ | 10284/12188 [37:27<3:46:07, 7.13s/it] {'loss': 0.2849, 'grad_norm': 1.1987060374566454, 'learning_rate': 6.27123363224284e-07, 'epoch': 0.84} + 84%|████████▍ | 10284/12188 [37:27<3:46:07, 7.13s/it] 84%|████████▍ | 10285/12188 [37:34<3:42:50, 7.03s/it] {'loss': 0.3265, 'grad_norm': 0.7602938170790171, 'learning_rate': 6.264792417759202e-07, 'epoch': 0.84} + 84%|████████▍ | 10285/12188 [37:34<3:42:50, 7.03s/it] 84%|████████▍ | 10286/12188 [37:41<3:44:47, 7.09s/it] {'loss': 0.2614, 'grad_norm': 0.685773503286683, 'learning_rate': 6.258354291785668e-07, 'epoch': 0.84} + 84%|████████▍ | 10286/12188 [37:41<3:44:47, 7.09s/it] 84%|████████▍ | 10287/12188 [37:48<3:41:24, 6.99s/it] {'loss': 0.2756, 'grad_norm': 0.6988137470106264, 'learning_rate': 6.251919254776878e-07, 'epoch': 0.84} + 84%|████████▍ | 10287/12188 [37:48<3:41:24, 6.99s/it] 84%|████████▍ | 10288/12188 [37:55<3:42:29, 7.03s/it] {'loss': 0.3025, 'grad_norm': 0.7133321491927573, 'learning_rate': 6.245487307187253e-07, 'epoch': 0.84} + 84%|████████▍ | 10288/12188 [37:55<3:42:29, 7.03s/it] 84%|████████▍ | 10289/12188 [38:03<3:46:51, 7.17s/it] {'loss': 0.3323, 'grad_norm': 0.743906865402098, 'learning_rate': 6.239058449471025e-07, 'epoch': 0.84} + 84%|████████▍ | 10289/12188 [38:03<3:46:51, 7.17s/it] 84%|████████▍ | 10290/12188 [38:10<3:48:03, 7.21s/it] {'loss': 0.3001, 'grad_norm': 0.7435357778197605, 'learning_rate': 6.232632682082174e-07, 'epoch': 0.84} + 84%|████████▍ | 10290/12188 [38:10<3:48:03, 7.21s/it] 84%|████████▍ | 10291/12188 [38:17<3:41:24, 7.00s/it] {'loss': 0.293, 'grad_norm': 0.6647423165651076, 'learning_rate': 6.226210005474486e-07, 'epoch': 0.84} + 84%|████████▍ | 10291/12188 [38:17<3:41:24, 7.00s/it] 84%|████████▍ | 10292/12188 [38:23<3:36:21, 6.85s/it] {'loss': 0.3116, 'grad_norm': 0.7287679971082337, 'learning_rate': 6.219790420101529e-07, 'epoch': 0.84} + 84%|████████▍ | 10292/12188 [38:23<3:36:21, 6.85s/it] 84%|████████▍ | 10293/12188 [38:32<3:55:12, 7.45s/it] {'loss': 0.2803, 'grad_norm': 0.9025145684348604, 'learning_rate': 6.213373926416627e-07, 'epoch': 0.84} + 84%|████████▍ | 10293/12188 [38:32<3:55:12, 7.45s/it] 84%|████████▍ | 10294/12188 [38:39<3:50:04, 7.29s/it] {'loss': 0.2755, 'grad_norm': 0.8546085431023722, 'learning_rate': 6.206960524872913e-07, 'epoch': 0.84} + 84%|████████▍ | 10294/12188 [38:39<3:50:04, 7.29s/it] 84%|████████▍ | 10295/12188 [38:46<3:45:25, 7.15s/it] {'loss': 0.3034, 'grad_norm': 0.8039055169055911, 'learning_rate': 6.200550215923284e-07, 'epoch': 0.84} + 84%|██████���█▍ | 10295/12188 [38:46<3:45:25, 7.15s/it] 84%|████████▍ | 10296/12188 [38:53<3:47:42, 7.22s/it] {'loss': 0.3032, 'grad_norm': 0.8359804913013861, 'learning_rate': 6.194143000020425e-07, 'epoch': 0.84} + 84%|████████▍ | 10296/12188 [38:53<3:47:42, 7.22s/it] 84%|████████▍ | 10297/12188 [39:00<3:42:03, 7.05s/it] {'loss': 0.2814, 'grad_norm': 0.6258373534076496, 'learning_rate': 6.187738877616822e-07, 'epoch': 0.84} + 84%|████████▍ | 10297/12188 [39:00<3:42:03, 7.05s/it] 84%|████████▍ | 10298/12188 [39:07<3:42:11, 7.05s/it] {'loss': 0.2908, 'grad_norm': 0.6695353604951522, 'learning_rate': 6.181337849164699e-07, 'epoch': 0.84} + 84%|████████▍ | 10298/12188 [39:07<3:42:11, 7.05s/it] 85%|████████▍ | 10299/12188 [39:14<3:44:51, 7.14s/it] {'loss': 0.3345, 'grad_norm': 0.6907029047057565, 'learning_rate': 6.174939915116107e-07, 'epoch': 0.84} + 85%|████████▍ | 10299/12188 [39:14<3:44:51, 7.14s/it] 85%|████████▍ | 10300/12188 [39:22<3:50:48, 7.34s/it] {'loss': 0.3124, 'grad_norm': 0.7291701633824489, 'learning_rate': 6.168545075922844e-07, 'epoch': 0.85} + 85%|████████▍ | 10300/12188 [39:22<3:50:48, 7.34s/it] 85%|████████▍ | 10301/12188 [39:29<3:45:28, 7.17s/it] {'loss': 0.2846, 'grad_norm': 0.6742337020395687, 'learning_rate': 6.162153332036503e-07, 'epoch': 0.85} + 85%|████████▍ | 10301/12188 [39:29<3:45:28, 7.17s/it] 85%|████████▍ | 10302/12188 [39:36<3:44:50, 7.15s/it] {'loss': 0.3045, 'grad_norm': 0.6936166187537371, 'learning_rate': 6.155764683908466e-07, 'epoch': 0.85} + 85%|████████▍ | 10302/12188 [39:36<3:44:50, 7.15s/it] 85%|████████▍ | 10303/12188 [39:43<3:42:57, 7.10s/it] {'loss': 0.3142, 'grad_norm': 0.7617879341955055, 'learning_rate': 6.14937913198988e-07, 'epoch': 0.85} + 85%|████████▍ | 10303/12188 [39:43<3:42:57, 7.10s/it] 85%|████████▍ | 10304/12188 [39:50<3:45:45, 7.19s/it] {'loss': 0.2975, 'grad_norm': 0.7246605274396433, 'learning_rate': 6.142996676731688e-07, 'epoch': 0.85} + 85%|████████▍ | 10304/12188 [39:50<3:45:45, 7.19s/it] 85%|████████▍ | 10305/12188 [39:57<3:39:26, 6.99s/it] {'loss': 0.3127, 'grad_norm': 0.7766419081135274, 'learning_rate': 6.13661731858462e-07, 'epoch': 0.85} + 85%|████████▍ | 10305/12188 [39:57<3:39:26, 6.99s/it] 85%|████████▍ | 10306/12188 [40:03<3:36:06, 6.89s/it] {'loss': 0.3382, 'grad_norm': 0.78784351010387, 'learning_rate': 6.130241057999153e-07, 'epoch': 0.85} + 85%|████████▍ | 10306/12188 [40:03<3:36:06, 6.89s/it] 85%|████████▍ | 10307/12188 [40:10<3:34:30, 6.84s/it] {'loss': 0.3133, 'grad_norm': 0.844308813639729, 'learning_rate': 6.123867895425589e-07, 'epoch': 0.85} + 85%|████████▍ | 10307/12188 [40:10<3:34:30, 6.84s/it] 85%|████████▍ | 10308/12188 [40:18<3:42:29, 7.10s/it] {'loss': 0.2955, 'grad_norm': 0.7863849601896831, 'learning_rate': 6.117497831313973e-07, 'epoch': 0.85} + 85%|████████▍ | 10308/12188 [40:18<3:42:29, 7.10s/it] 85%|████████▍ | 10309/12188 [40:25<3:41:01, 7.06s/it] {'loss': 0.293, 'grad_norm': 0.6551853648601094, 'learning_rate': 6.111130866114162e-07, 'epoch': 0.85} + 85%|████████▍ | 10309/12188 [40:25<3:41:01, 7.06s/it] 85%|████████▍ | 10310/12188 [40:31<3:36:48, 6.93s/it] {'loss': 0.343, 'grad_norm': 0.7142099757333334, 'learning_rate': 6.10476700027578e-07, 'epoch': 0.85} + 85%|████████▍ | 10310/12188 [40:31<3:36:48, 6.93s/it] 85%|████████▍ | 10311/12188 [40:39<3:39:22, 7.01s/it] {'loss': 0.261, 'grad_norm': 0.6994995556827385, 'learning_rate': 6.098406234248222e-07, 'epoch': 0.85} + 85%|████████▍ | 10311/12188 [40:39<3:39:22, 7.01s/it] 85%|████████▍ | 10312/12188 [40:45<3:37:36, 6.96s/it] {'loss': 0.2922, 'grad_norm': 0.7475540525736598, 'learning_rate': 6.092048568480697e-07, 'epoch': 0.85} + 85%|████████▍ | 10312/12188 [40:45<3:37:36, 6.96s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f1d2a07ae30> +[Try #0] Failed to fetch sample 4870604 in VC:s3://gui/OS-Atlas/desktop_domain/linux_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f1d2a07ae30> +Problematic sample: {'image': 'output_20240912_153123_original_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on '跳至本页面的“常见问题解答”部分'"}, {'from': 'gpt', 'value': '\nclick(x=0.2504, y=0.3285)\n'}]} + 85%|████████▍ | 10313/12188 [40:53<3:45:32, 7.22s/it] {'loss': 0.2885, 'grad_norm': 0.6934878152567915, 'learning_rate': 6.085694003422144e-07, 'epoch': 0.85} + 85%|████████▍ | 10313/12188 [40:53<3:45:32, 7.22s/it] 85%|████████▍ | 10314/12188 [41:00<3:39:58, 7.04s/it] {'loss': 0.2935, 'grad_norm': 0.6816507521102593, 'learning_rate': 6.079342539521338e-07, 'epoch': 0.85} + 85%|████████▍ | 10314/12188 [41:00<3:39:58, 7.04s/it] 85%|████████▍ | 10315/12188 [41:07<3:43:32, 7.16s/it] {'loss': 0.2822, 'grad_norm': 0.7049120298113404, 'learning_rate': 6.072994177226793e-07, 'epoch': 0.85} + 85%|████████▍ | 10315/12188 [41:07<3:43:32, 7.16s/it] 85%|████████▍ | 10316/12188 [41:15<3:44:22, 7.19s/it] {'loss': 0.3207, 'grad_norm': 0.8020367411369395, 'learning_rate': 6.066648916986823e-07, 'epoch': 0.85} + 85%|████████▍ | 10316/12188 [41:15<3:44:22, 7.19s/it] 85%|████████▍ | 10317/12188 [41:22<3:45:31, 7.23s/it] {'loss': 0.2915, 'grad_norm': 0.7433878846565632, 'learning_rate': 6.060306759249529e-07, 'epoch': 0.85} + 85%|████████▍ | 10317/12188 [41:22<3:45:31, 7.23s/it] 85%|████████▍ | 10318/12188 [41:29<3:41:24, 7.10s/it] {'loss': 0.2819, 'grad_norm': 0.7149124049257364, 'learning_rate': 6.053967704462766e-07, 'epoch': 0.85} + 85%|████████▍ | 10318/12188 [41:29<3:41:24, 7.10s/it] 85%|████████▍ | 10319/12188 [41:35<3:37:19, 6.98s/it] {'loss': 0.2758, 'grad_norm': 0.9652129689450527, 'learning_rate': 6.047631753074195e-07, 'epoch': 0.85} + 85%|████████▍ | 10319/12188 [41:35<3:37:19, 6.98s/it] 85%|████████▍ | 10320/12188 [41:43<3:42:40, 7.15s/it] {'loss': 0.327, 'grad_norm': 0.7035546117939983, 'learning_rate': 6.041298905531273e-07, 'epoch': 0.85} + 85%|████████▍ | 10320/12188 [41:43<3:42:40, 7.15s/it] 85%|████████▍ | 10321/12188 [41:50<3:43:19, 7.18s/it] {'loss': 0.3167, 'grad_norm': 0.7037272814514137, 'learning_rate': 6.034969162281184e-07, 'epoch': 0.85} + 85%|████████▍ | 10321/12188 [41:50<3:43:19, 7.18s/it] 85%|████████▍ | 10322/12188 [41:57<3:41:16, 7.12s/it] {'loss': 0.3054, 'grad_norm': 0.6551487556160562, 'learning_rate': 6.028642523770934e-07, 'epoch': 0.85} + 85%|████████▍ | 10322/12188 [41:57<3:41:16, 7.12s/it] 85%|████████▍ | 10323/12188 [42:04<3:42:21, 7.15s/it] {'loss': 0.2795, 'grad_norm': 0.6663918732611702, 'learning_rate': 6.022318990447318e-07, 'epoch': 0.85} + 85%|████████▍ | 10323/12188 [42:04<3:42:21, 7.15s/it] 85%|████████▍ | 10324/12188 [42:12<3:44:31, 7.23s/it] {'loss': 0.294, 'grad_norm': 0.6816384223488243, 'learning_rate': 6.01599856275687e-07, 'epoch': 0.85} + 85%|████████▍ | 10324/12188 [42:12<3:44:31, 7.23s/it] 85%|████████▍ | 10325/12188 [42:22<4:14:41, 8.20s/it] {'loss': 0.3302, 'grad_norm': 0.6912108846437859, 'learning_rate': 6.009681241145943e-07, 'epoch': 0.85} + 85%|████████▍ | 10325/12188 [42:22<4:14:41, 8.20s/it] 85%|████████▍ | 10326/12188 [42:31<4:17:43, 8.30s/it] {'loss': 0.2836, 'grad_norm': 0.778602054609131, 'learning_rate': 6.003367026060647e-07, 'epoch': 0.85} + 85%|████████▍ | 10326/12188 [42:31<4:17:43, 8.30s/it] 85%|████████▍ | 10327/12188 [42:37<4:02:29, 7.82s/it] {'loss': 0.2832, 'grad_norm': 0.7408990623509364, 'learning_rate': 5.997055917946893e-07, 'epoch': 0.85} + 85%|████████▍ | 10327/12188 [42:38<4:02:29, 7.82s/it] 85%|████████▍ | 10328/12188 [42:45<3:59:46, 7.73s/it] {'loss': 0.2934, 'grad_norm': 0.7280357938147507, 'learning_rate': 5.99074791725035e-07, 'epoch': 0.85} + 85%|████████▍ | 10328/12188 [42:45<3:59:46, 7.73s/it] 85%|████████▍ | 10329/12188 [42:54<4:08:24, 8.02s/it] {'loss': 0.2546, 'grad_norm': 0.684753840239408, 'learning_rate': 5.984443024416476e-07, 'epoch': 0.85} + 85%|████████▍ | 10329/12188 [42:54<4:08:24, 8.02s/it] 85%|████████▍ | 10330/12188 [43:00<3:55:34, 7.61s/it] {'loss': 0.3155, 'grad_norm': 0.8556288922223203, 'learning_rate': 5.978141239890528e-07, 'epoch': 0.85} + 85%|████████▍ | 10330/12188 [43:00<3:55:34, 7.61s/it] 85%|████████▍ | 10331/12188 [43:08<3:53:18, 7.54s/it] {'loss': 0.3302, 'grad_norm': 0.6669321023419676, 'learning_rate': 5.971842564117513e-07, 'epoch': 0.85} + 85%|████████▍ | 10331/12188 [43:08<3:53:18, 7.54s/it] 85%|████████▍ | 10332/12188 [43:15<3:46:36, 7.33s/it] {'loss': 0.275, 'grad_norm': 0.6822213727521311, 'learning_rate': 5.965546997542238e-07, 'epoch': 0.85} + 85%|████████▍ | 10332/12188 [43:15<3:46:36, 7.33s/it] 85%|████████▍ | 10333/12188 [43:22<3:50:37, 7.46s/it] {'loss': 0.3444, 'grad_norm': 0.6899421848887478, 'learning_rate': 5.959254540609294e-07, 'epoch': 0.85} + 85%|████████▍ | 10333/12188 [43:22<3:50:37, 7.46s/it] 85%|████████▍ | 10334/12188 [43:29<3:46:42, 7.34s/it] {'loss': 0.2757, 'grad_norm': 0.7191092170344996, 'learning_rate': 5.952965193763028e-07, 'epoch': 0.85} + 85%|████████▍ | 10334/12188 [43:29<3:46:42, 7.34s/it] 85%|████████▍ | 10335/12188 [43:37<3:50:35, 7.47s/it] {'loss': 0.3228, 'grad_norm': 0.7316979220250501, 'learning_rate': 5.946678957447605e-07, 'epoch': 0.85} + 85%|████████▍ | 10335/12188 [43:37<3:50:35, 7.47s/it] 85%|████████▍ | 10336/12188 [43:45<3:50:59, 7.48s/it] {'loss': 0.3067, 'grad_norm': 0.6639212430139187, 'learning_rate': 5.940395832106926e-07, 'epoch': 0.85} + 85%|████████▍ | 10336/12188 [43:45<3:50:59, 7.48s/it] 85%|████████▍ | 10337/12188 [43:52<3:48:38, 7.41s/it] {'loss': 0.3332, 'grad_norm': 0.7035053871295954, 'learning_rate': 5.93411581818471e-07, 'epoch': 0.85} + 85%|████████▍ | 10337/12188 [43:52<3:48:38, 7.41s/it] 85%|████████▍ | 10338/12188 [43:59<3:41:49, 7.19s/it] {'loss': 0.3233, 'grad_norm': 0.6563462668635291, 'learning_rate': 5.927838916124445e-07, 'epoch': 0.85} + 85%|████████▍ | 10338/12188 [43:59<3:41:49, 7.19s/it] 85%|████████▍ | 10339/12188 [44:06<3:43:36, 7.26s/it] {'loss': 0.314, 'grad_norm': 0.7112914480458847, 'learning_rate': 5.921565126369378e-07, 'epoch': 0.85} + 85%|████████▍ | 10339/12188 [44:06<3:43:36, 7.26s/it] 85%|████████▍ | 10340/12188 [44:13<3:37:21, 7.06s/it] {'loss': 0.3006, 'grad_norm': 0.7807210717509075, 'learning_rate': 5.915294449362574e-07, 'epoch': 0.85} + 85%|████████▍ | 10340/12188 [44:13<3:37:21, 7.06s/it] 85%|████████▍ | 10341/12188 [44:22<3:59:17, 7.77s/it] {'loss': 0.3064, 'grad_norm': 0.6784822425853597, 'learning_rate': 5.909026885546837e-07, 'epoch': 0.85} + 85%|████████▍ | 10341/12188 [44:22<3:59:17, 7.77s/it] 85%|████████▍ | 10342/12188 [44:29<3:52:35, 7.56s/it] {'loss': 0.3106, 'grad_norm': 0.6208071693002277, 'learning_rate': 5.902762435364795e-07, 'epoch': 0.85} + 85%|████████▍ | 10342/12188 [44:29<3:52:35, 7.56s/it] 85%|████████▍ | 10343/12188 [44:36<3:46:36, 7.37s/it] {'loss': 0.2893, 'grad_norm': 0.6817413556593023, 'learning_rate': 5.896501099258822e-07, 'epoch': 0.85} + 85%|████████▍ | 10343/12188 [44:36<3:46:36, 7.37s/it] 85%|████████▍ | 10344/12188 [44:42<3:38:00, 7.09s/it] {'loss': 0.2735, 'grad_norm': 0.7574218045612284, 'learning_rate': 5.890242877671077e-07, 'epoch': 0.85} + 85%|████████▍ | 10344/12188 [44:42<3:38:00, 7.09s/it] 85%|████████▍ | 10345/12188 [44:50<3:42:39, 7.25s/it] {'loss': 0.2623, 'grad_norm': 0.6194824475603423, 'learning_rate': 5.883987771043509e-07, 'epoch': 0.85} + 85%|████████▍ | 10345/12188 [44:50<3:42:39, 7.25s/it] 85%|████████▍ | 10346/12188 [44:57<3:37:50, 7.10s/it] {'loss': 0.2878, 'grad_norm': 0.683529713255001, 'learning_rate': 5.877735779817861e-07, 'epoch': 0.85} + 85%|████████▍ | 10346/12188 [44:57<3:37:50, 7.10s/it] 85%|████████▍ | 10347/12188 [45:04<3:36:48, 7.07s/it] {'loss': 0.3136, 'grad_norm': 0.7073907179791113, 'learning_rate': 5.871486904435608e-07, 'epoch': 0.85} + 85%|████████▍ | 10347/12188 [45:04<3:36:48, 7.07s/it] 85%|████████▍ | 10348/12188 [45:11<3:39:58, 7.17s/it] {'loss': 0.2579, 'grad_norm': 0.6483325646988438, 'learning_rate': 5.865241145338063e-07, 'epoch': 0.85} + 85%|████████▍ | 10348/12188 [45:11<3:39:58, 7.17s/it] 85%|████████▍ | 10349/12188 [45:19<3:45:07, 7.34s/it] {'loss': 0.2872, 'grad_norm': 0.7347414503717626, 'learning_rate': 5.858998502966273e-07, 'epoch': 0.85} + 85%|████████▍ | 10349/12188 [45:19<3:45:07, 7.34s/it] 85%|████████▍ | 10350/12188 [45:26<3:41:43, 7.24s/it] {'loss': 0.289, 'grad_norm': 0.7583775566664461, 'learning_rate': 5.852758977761091e-07, 'epoch': 0.85} + 85%|████████▍ | 10350/12188 [45:26<3:41:43, 7.24s/it] 85%|████████▍ | 10351/12188 [45:33<3:41:54, 7.25s/it] {'loss': 0.3286, 'grad_norm': 0.7980671512134331, 'learning_rate': 5.846522570163155e-07, 'epoch': 0.85} + 85%|████████▍ | 10351/12188 [45:33<3:41:54, 7.25s/it] 85%|████████▍ | 10352/12188 [45:40<3:38:13, 7.13s/it] {'loss': 0.3206, 'grad_norm': 0.7833204960407256, 'learning_rate': 5.840289280612837e-07, 'epoch': 0.85} + 85%|████████▍ | 10352/12188 [45:40<3:38:13, 7.13s/it] 85%|████████▍ | 10353/12188 [45:47<3:32:19, 6.94s/it] {'loss': 0.252, 'grad_norm': 0.6685430292974563, 'learning_rate': 5.83405910955036e-07, 'epoch': 0.85} + 85%|████████▍ | 10353/12188 [45:47<3:32:19, 6.94s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 85%|████████▍ | 10354/12188 [45:53<3:24:36, 6.69s/it] {'loss': 0.6414, 'grad_norm': 0.5750977496340008, 'learning_rate': 5.827832057415661e-07, 'epoch': 0.85} + 85%|████████▍ | 10354/12188 [45:53<3:24:36, 6.69s/it] 85%|████████▍ | 10355/12188 [46:00<3:33:16, 6.98s/it] {'loss': 0.2967, 'grad_norm': 0.6691806867516952, 'learning_rate': 5.821608124648504e-07, 'epoch': 0.85} + 85%|████████▍ | 10355/12188 [46:00<3:33:16, 6.98s/it] 85%|████████▍ | 10356/12188 [46:08<3:39:39, 7.19s/it] {'loss': 0.304, 'grad_norm': 0.676831371654559, 'learning_rate': 5.815387311688398e-07, 'epoch': 0.85} + 85%|████████▍ | 10356/12188 [46:08<3:39:39, 7.19s/it] 85%|████████▍ | 10357/12188 [46:15<3:32:58, 6.98s/it] {'loss': 0.3053, 'grad_norm': 0.6890346521888092, 'learning_rate': 5.809169618974647e-07, 'epoch': 0.85} + 85%|████████▍ | 10357/12188 [46:15<3:32:58, 6.98s/it] 85%|████████▍ | 10358/12188 [46:22<3:35:31, 7.07s/it] {'loss': 0.2852, 'grad_norm': 0.7019609771137237, 'learning_rate': 5.802955046946335e-07, 'epoch': 0.85} + 85%|████████▍ | 10358/12188 [46:22<3:35:31, 7.07s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1348, in _get_item + raise ValueError( +ValueError: Number of image tokens ['data/bottom-navigation/other_screenshot/original/ProductivityBottomNavigation_1739983263.5379376.png'] does not match number of images None +[Try #0] Failed to fetch sample 1868482 in VC:s3://gui-agent/jedi/images/component_v1_130k/component_v1_130k_extracted/. Exception: Number of image tokens ['data/bottom-navigation/other_screenshot/original/ProductivityBottomNavigation_1739983263.5379376.png'] does not match number of images None +Problematic sample: {'image': 'data/bottom-navigation/other_screenshot/original/ProductivityBottomNavigation_1739983263.5379376.png', 'conversations': [], 'image_id': 'data/bottom-navigation/other_screenshot/original/ProductivityBottomNavigation_1739983263.5379376.png'} + 85%|████████▍ | 10359/12188 [46:29<3:33:24, 7.00s/it] {'loss': 0.2961, 'grad_norm': 0.7099042120611966, 'learning_rate': 5.796743596042342e-07, 'epoch': 0.85} + 85%|████████▍ | 10359/12188 [46:29<3:33:24, 7.00s/it] 85%|████████▌ | 10360/12188 [46:36<3:33:54, 7.02s/it] {'loss': 0.2426, 'grad_norm': 0.6415029034264929, 'learning_rate': 5.790535266701291e-07, 'epoch': 0.85} + 85%|████████▌ | 10360/12188 [46:36<3:33:54, 7.02s/it] 85%|████████▌ | 10361/12188 [46:43<3:31:51, 6.96s/it] {'loss': 0.3098, 'grad_norm': 0.7275649537648752, 'learning_rate': 5.78433005936162e-07, 'epoch': 0.85} + 85%|████████▌ | 10361/12188 [46:43<3:31:51, 6.96s/it] 85%|████████▌ | 10362/12188 [46:49<3:29:45, 6.89s/it] {'loss': 0.2771, 'grad_norm': 0.7431540623331732, 'learning_rate': 5.778127974461511e-07, 'epoch': 0.85} + 85%|████████▌ | 10362/12188 [46:49<3:29:45, 6.89s/it] 85%|████████▌ | 10363/12188 [46:57<3:35:34, 7.09s/it] {'loss': 0.2751, 'grad_norm': 0.9558456150636936, 'learning_rate': 5.77192901243896e-07, 'epoch': 0.85} + 85%|████████▌ | 10363/12188 [46:57<3:35:34, 7.09s/it] 85%|████████▌ | 10364/12188 [47:04<3:33:28, 7.02s/it] {'loss': 0.3185, 'grad_norm': 0.6944672573083066, 'learning_rate': 5.765733173731731e-07, 'epoch': 0.85} + 85%|████████▌ | 10364/12188 [47:04<3:33:28, 7.02s/it] 85%|████████▌ | 10365/12188 [47:10<3:28:48, 6.87s/it] {'loss': 0.3429, 'grad_norm': 0.7766038985934067, 'learning_rate': 5.759540458777346e-07, 'epoch': 0.85} + 85%|████████▌ | 10365/12188 [47:10<3:28:48, 6.87s/it] 85%|████████▌ | 10366/12188 [47:17<3:29:30, 6.90s/it] {'loss': 0.2682, 'grad_norm': 0.6437450085160427, 'learning_rate': 5.753350868013147e-07, 'epoch': 0.85} + 85%|████████▌ | 10366/12188 [47:17<3:29:30, 6.90s/it] 85%|████████▌ | 10367/12188 [47:24<3:27:46, 6.85s/it] {'loss': 0.3437, 'grad_norm': 0.7044979105242638, 'learning_rate': 5.747164401876215e-07, 'epoch': 0.85} + 85%|████████▌ | 10367/12188 [47:24<3:27:46, 6.85s/it] 85%|████████▌ | 10368/12188 [47:31<3:29:07, 6.89s/it] {'loss': 0.2864, 'grad_norm': 0.7378429335326553, 'learning_rate': 5.740981060803441e-07, 'epoch': 0.85} + 85%|████████▌ | 10368/12188 [47:31<3:29:07, 6.89s/it] 85%|████████▌ | 10369/12188 [47:38<3:28:57, 6.89s/it] {'loss': 0.3108, 'grad_norm': 0.7265763038268253, 'learning_rate': 5.73480084523147e-07, 'epoch': 0.85} + 85%|████████▌ | 10369/12188 [47:38<3:28:57, 6.89s/it] 85%|████████▌ | 10370/12188 [47:47<3:51:21, 7.64s/it] {'loss': 0.2912, 'grad_norm': 0.7162800187874928, 'learning_rate': 5.728623755596757e-07, 'epoch': 0.85} + 85%|████████▌ | 10370/12188 [47:47<3:51:21, 7.64s/it] 85%|████████▌ | 10371/12188 [47:54<3:47:48, 7.52s/it] {'loss': 0.3397, 'grad_norm': 0.7142703942875657, 'learning_rate': 5.722449792335505e-07, 'epoch': 0.85} + 85%|████████▌ | 10371/12188 [47:54<3:47:48, 7.52s/it] 85%|████████▌ | 10372/12188 [48:03<3:56:10, 7.80s/it] {'loss': 0.2969, 'grad_norm': 0.6741223663355562, 'learning_rate': 5.716278955883703e-07, 'epoch': 0.85} + 85%|████████▌ | 10372/12188 [48:03<3:56:10, 7.80s/it] 85%|████████▌ | 10373/12188 [48:10<3:45:50, 7.47s/it] {'loss': 0.2748, 'grad_norm': 0.7336860449656302, 'learning_rate': 5.71011124667713e-07, 'epoch': 0.85} + 85%|████████▌ | 10373/12188 [48:10<3:45:50, 7.47s/it] 85%|████████▌ | 10374/12188 [48:17<3:45:34, 7.46s/it] {'loss': 0.2712, 'grad_norm': 0.7284255807436877, 'learning_rate': 5.703946665151356e-07, 'epoch': 0.85} + 85%|████████▌ | 10374/12188 [48:17<3:45:34, 7.46s/it] 85%|████████▌ | 10375/12188 [48:24<3:41:12, 7.32s/it] {'loss': 0.2902, 'grad_norm': 0.9120423496459216, 'learning_rate': 5.697785211741691e-07, 'epoch': 0.85} + 85%|████████▌ | 10375/12188 [48:24<3:41:12, 7.32s/it] 85%|████████▌ | 10376/12188 [48:31<3:35:14, 7.13s/it] {'loss': 0.3097, 'grad_norm': 0.6264882114520404, 'learning_rate': 5.691626886883261e-07, 'epoch': 0.85} + 85%|████████▌ | 10376/12188 [48:31<3:35:14, 7.13s/it] 85%|████████▌ | 10377/12188 [48:40<3:58:54, 7.92s/it] {'loss': 0.2955, 'grad_norm': 0.8081314332262372, 'learning_rate': 5.685471691010958e-07, 'epoch': 0.85} + 85%|████████▌ | 10377/12188 [48:40<3:58:54, 7.92s/it] 85%|████████▌ | 10378/12188 [48:48<3:56:07, 7.83s/it] {'loss': 0.2899, 'grad_norm': 0.7532676656918502, 'learning_rate': 5.679319624559443e-07, 'epoch': 0.85} + 85%|████████▌ | 10378/12188 [48:48<3:56:07, 7.83s/it] 85%|████████▌ | 10379/12188 [48:55<3:52:18, 7.70s/it] {'loss': 0.3062, 'grad_norm': 0.774682559067121, 'learning_rate': 5.673170687963175e-07, 'epoch': 0.85} + 85%|████████▌ | 10379/12188 [48:56<3:52:18, 7.70s/it] 85%|████████▌ | 10380/12188 [49:03<3:46:28, 7.52s/it] {'loss': 0.28, 'grad_norm': 0.6964211804306668, 'learning_rate': 5.667024881656369e-07, 'epoch': 0.85} + 85%|████████▌ | 10380/12188 [49:03<3:46:28, 7.52s/it] 85%|████████▌ | 10381/12188 [49:09<3:38:59, 7.27s/it] {'loss': 0.2868, 'grad_norm': 0.720734413559343, 'learning_rate': 5.660882206073037e-07, 'epoch': 0.85} + 85%|████████▌ | 10381/12188 [49:09<3:38:59, 7.27s/it] 85%|████████▌ | 10382/12188 [49:19<4:01:24, 8.02s/it] {'loss': 0.3166, 'grad_norm': 0.7430745315309292, 'learning_rate': 5.654742661646978e-07, 'epoch': 0.85} + 85%|████████▌ | 10382/12188 [49:19<4:01:24, 8.02s/it] 85%|████████▌ | 10383/12188 [49:26<3:50:02, 7.65s/it] {'loss': 0.2939, 'grad_norm': 0.6647366089461092, 'learning_rate': 5.64860624881175e-07, 'epoch': 0.85} + 85%|████████▌ | 10383/12188 [49:26<3:50:02, 7.65s/it] 85%|████████▌ | 10384/12188 [49:34<3:56:50, 7.88s/it] {'loss': 0.2798, 'grad_norm': 0.682267264688231, 'learning_rate': 5.64247296800069e-07, 'epoch': 0.85} + 85%|████████▌ | 10384/12188 [49:34<3:56:50, 7.88s/it] 85%|████████▌ | 10385/12188 [49:41<3:44:46, 7.48s/it] {'loss': 0.3102, 'grad_norm': 0.7289661049191652, 'learning_rate': 5.636342819646912e-07, 'epoch': 0.85} + 85%|████████▌ | 10385/12188 [49:41<3:44:46, 7.48s/it] 85%|████████▌ | 10386/12188 [49:48<3:41:57, 7.39s/it] {'loss': 0.3056, 'grad_norm': 0.7389550224448883, 'learning_rate': 5.630215804183325e-07, 'epoch': 0.85} + 85%|████████▌ | 10386/12188 [49:48<3:41:57, 7.39s/it] 85%|████████▌ | 10387/12188 [49:55<3:35:58, 7.20s/it] {'loss': 0.3318, 'grad_norm': 0.7154451437812105, 'learning_rate': 5.624091922042629e-07, 'epoch': 0.85} + 85%|████████▌ | 10387/12188 [49:55<3:35:58, 7.20s/it] 85%|████████▌ | 10388/12188 [50:02<3:33:28, 7.12s/it] {'loss': 0.3027, 'grad_norm': 0.6851803718290775, 'learning_rate': 5.617971173657255e-07, 'epoch': 0.85} + 85%|████████▌ | 10388/12188 [50:02<3:33:28, 7.12s/it] 85%|████████▌ | 10389/12188 [50:09<3:32:07, 7.07s/it] {'loss': 0.2833, 'grad_norm': 0.7103246153239989, 'learning_rate': 5.611853559459457e-07, 'epoch': 0.85} + 85%|████████▌ | 10389/12188 [50:09<3:32:07, 7.07s/it] 85%|████████▌ | 10390/12188 [50:16<3:34:15, 7.15s/it] {'loss': 0.28, 'grad_norm': 0.6632762634872797, 'learning_rate': 5.60573907988124e-07, 'epoch': 0.85} + 85%|████████▌ | 10390/12188 [50:16<3:34:15, 7.15s/it] 85%|████████▌ | 10391/12188 [50:23<3:30:35, 7.03s/it] {'loss': 0.3189, 'grad_norm': 0.900563916184925, 'learning_rate': 5.599627735354408e-07, 'epoch': 0.85} + 85%|████████▌ | 10391/12188 [50:23<3:30:35, 7.03s/it] 85%|████████▌ | 10392/12188 [50:30<3:31:00, 7.05s/it] {'loss': 0.2766, 'grad_norm': 0.7171355958033043, 'learning_rate': 5.593519526310532e-07, 'epoch': 0.85} + 85%|████████▌ | 10392/12188 [50:30<3:31:00, 7.05s/it] 85%|████████▌ | 10393/12188 [50:37<3:30:17, 7.03s/it] {'loss': 0.2562, 'grad_norm': 0.6805815590547873, 'learning_rate': 5.587414453180956e-07, 'epoch': 0.85} + 85%|████████▌ | 10393/12188 [50:37<3:30:17, 7.03s/it] 85%|████████▌ | 10394/12188 [50:43<3:25:40, 6.88s/it] {'loss': 0.3056, 'grad_norm': 0.7324951347957896, 'learning_rate': 5.58131251639682e-07, 'epoch': 0.85} + 85%|████████▌ | 10394/12188 [50:43<3:25:40, 6.88s/it] 85%|████████▌ | 10395/12188 [50:50<3:25:33, 6.88s/it] {'loss': 0.2914, 'grad_norm': 0.7294097034867921, 'learning_rate': 5.575213716389039e-07, 'epoch': 0.85} + 85%|████████▌ | 10395/12188 [50:50<3:25:33, 6.88s/it] 85%|████████▌ | 10396/12188 [50:58<3:31:18, 7.08s/it] {'loss': 0.3298, 'grad_norm': 0.7813700489017142, 'learning_rate': 5.569118053588291e-07, 'epoch': 0.85} + 85%|████████▌ | 10396/12188 [50:58<3:31:18, 7.08s/it] 85%|████████▌ | 10397/12188 [51:05<3:28:45, 6.99s/it] {'loss': 0.2931, 'grad_norm': 0.6723269165323361, 'learning_rate': 5.563025528425031e-07, 'epoch': 0.85} + 85%|████████▌ | 10397/12188 [51:05<3:28:45, 6.99s/it] 85%|████████▌ | 10398/12188 [51:11<3:24:54, 6.87s/it] {'loss': 0.3207, 'grad_norm': 0.6449872372789548, 'learning_rate': 5.556936141329521e-07, 'epoch': 0.85} + 85%|████████▌ | 10398/12188 [51:11<3:24:54, 6.87s/it] 85%|████████▌ | 10399/12188 [51:18<3:22:39, 6.80s/it] {'loss': 0.2865, 'grad_norm': 0.6689077863087264, 'learning_rate': 5.550849892731774e-07, 'epoch': 0.85} + 85%|████████▌ | 10399/12188 [51:18<3:22:39, 6.80s/it] 85%|████████▌ | 10400/12188 [51:25<3:24:01, 6.85s/it] {'loss': 0.3031, 'grad_norm': 0.6809660313600899, 'learning_rate': 5.5447667830616e-07, 'epoch': 0.85} + 85%|████████▌ | 10400/12188 [51:25<3:24:01, 6.85s/it] 85%|████████▌ | 10401/12188 [51:31<3:23:05, 6.82s/it] {'loss': 0.3002, 'grad_norm': 0.706072518033087, 'learning_rate': 5.538686812748567e-07, 'epoch': 0.85} + 85%|████████▌ | 10401/12188 [51:31<3:23:05, 6.82s/it] 85%|████████▌ | 10402/12188 [51:39<3:27:46, 6.98s/it] {'loss': 0.2969, 'grad_norm': 0.7415221494135418, 'learning_rate': 5.532609982222048e-07, 'epoch': 0.85} + 85%|████████▌ | 10402/12188 [51:39<3:27:46, 6.98s/it] 85%|████████▌ | 10403/12188 [51:46<3:29:37, 7.05s/it] {'loss': 0.3231, 'grad_norm': 0.6943602068261671, 'learning_rate': 5.526536291911161e-07, 'epoch': 0.85} + 85%|████████▌ | 10403/12188 [51:46<3:29:37, 7.05s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1348, in _get_item + raise ValueError( +ValueError: Number of image tokens ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'] does not match number of images None +[Try #0] Failed to fetch sample 1096386 in VC:s3://gui/aguvis/aguvis-stage2/amex/images. Exception: Number of image tokens ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'] does not match number of images None +Problematic sample: {'image': ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'], 'conversations': [{'from': 'human', 'value': "\nPlease generate the next move according to the UI screenshot, the task and previous operations.\n\nTask:\nI want to book a hotel in london, prize should be less than $600, guest rating is 8+, 4 star rating, breakfast included\n\nPrevious operations:\nStep 1: Tap on the Chrome app to start searching for hotels in London.\nStep 2: Type 'Chrome' in the search bar to open the Chrome browser.\nStep 3: Tap on the Hotels.com app icon to begin searching for a hotel.\nStep 4: Wait for the Hotels.com app to finish loading to proceed with the hotel search.\nStep 5: Tap on the 'Going to' field to enter London as the destination.\nStep 6: Tap on 'London' from Recent Searches to select it as the destination for the hotel search.\nStep 7: Tap on 'London, England, United Kingdom' to select it as the destination for the hotel search.\nStep 8: Tap on the 'Search' button to view available hotels in London.\nStep 9: Tap on the filter icon or option to apply filters to the hotel search results.\nStep 10: Swipe up on the screen to reveal more filter options, such as guest rating and hotel stars.\nStep 11: Swipe up to reveal more filter options, specifically guest ratings and hotel stars.\nStep 12: Tap on the 4-star rating box to select it.\nStep 13: Tap 'Done' to confirm the current filter selections and proceed.\nStep 14: Swipe up on the screen to locate and select the 'breakfast included' filter option.\nStep 15: Tap on 'Breakfast included' to select it.\nStep 16: Tap on the 'Done' button to apply the filters and view available hotel options.\nStep 17: Tap on the topmost hotel card to view its details.\nStep 18: Tap on the 'Select a room' button to check available room options and their prices.\nStep 19: Tap on 'Select and customize' to proceed with the room selection and customization options.\nStep 20: Tap on 'Breakfast buffet' to select it, then tap on 'Reserve' to book the room."}, {'from': 'gpt', 'value': "\nGiven the sequence of booking actions, it seems the process is almost complete. The loading sign indicates that the system is processing the final booking or confirmation step.\n\n\nWait for the loading to complete to confirm the booking status.\n\n\nterminate(status='success')\n"}]} + 85%|████████▌ | 10404/12188 [51:54<3:37:19, 7.31s/it] {'loss': 0.3091, 'grad_norm': 0.7152807001278461, 'learning_rate': 5.520465742244824e-07, 'epoch': 0.85} + 85%|████████▌ | 10404/12188 [51:54<3:37:19, 7.31s/it] 85%|████████▌ | 10405/12188 [52:01<3:30:51, 7.10s/it] {'loss': 0.3184, 'grad_norm': 0.7147983428794491, 'learning_rate': 5.51439833365175e-07, 'epoch': 0.85} + 85%|████████▌ | 10405/12188 [52:01<3:30:51, 7.10s/it] 85%|████████▌ | 10406/12188 [52:12<4:07:46, 8.34s/it] {'loss': 0.2367, 'grad_norm': 0.6403205842887822, 'learning_rate': 5.508334066560377e-07, 'epoch': 0.85} + 85%|████████▌ | 10406/12188 [52:12<4:07:46, 8.34s/it] 85%|████████▌ | 10407/12188 [52:19<4:00:37, 8.11s/it] {'loss': 0.3062, 'grad_norm': 0.6868125756271176, 'learning_rate': 5.50227294139899e-07, 'epoch': 0.85} + 85%|████████▌ | 10407/12188 [52:19<4:00:37, 8.11s/it] 85%|████████▌ | 10408/12188 [52:27<3:53:31, 7.87s/it] {'loss': 0.2897, 'grad_norm': 0.7533856128457211, 'learning_rate': 5.496214958595581e-07, 'epoch': 0.85} + 85%|████████▌ | 10408/12188 [52:27<3:53:31, 7.87s/it] 85%|████████▌ | 10409/12188 [52:34<3:48:51, 7.72s/it] {'loss': 0.2767, 'grad_norm': 0.6284149822149206, 'learning_rate': 5.490160118577969e-07, 'epoch': 0.85} + 85%|████████▌ | 10409/12188 [52:34<3:48:51, 7.72s/it] 85%|████████▌ | 10410/12188 [52:41<3:44:06, 7.56s/it] {'loss': 0.2732, 'grad_norm': 0.637918027421002, 'learning_rate': 5.484108421773754e-07, 'epoch': 0.85} + 85%|████████▌ | 10410/12188 [52:41<3:44:06, 7.56s/it] 85%|████████▌ | 10411/12188 [52:50<3:53:15, 7.88s/it] {'loss': 0.3079, 'grad_norm': 0.7400805943978099, 'learning_rate': 5.47805986861028e-07, 'epoch': 0.85} + 85%|████████▌ | 10411/12188 [52:50<3:53:15, 7.88s/it] 85%|████████▌ | 10412/12188 [52:56<3:42:28, 7.52s/it] {'loss': 0.3107, 'grad_norm': 0.6425002800818335, 'learning_rate': 5.47201445951468e-07, 'epoch': 0.85} + 85%|████████▌ | 10412/12188 [52:56<3:42:28, 7.52s/it] 85%|████████▌ | 10413/12188 [53:04<3:38:41, 7.39s/it] {'loss': 0.2913, 'grad_norm': 0.6761175538803809, 'learning_rate': 5.465972194913882e-07, 'epoch': 0.85} + 85%|████████▌ | 10413/12188 [53:04<3:38:41, 7.39s/it] 85%|████████▌ | 10414/12188 [53:11<3:35:46, 7.30s/it] {'loss': 0.2725, 'grad_norm': 0.6820315769990121, 'learning_rate': 5.459933075234575e-07, 'epoch': 0.85} + 85%|████████▌ | 10414/12188 [53:11<3:35:46, 7.30s/it] 85%|████████▌ | 10415/12188 [53:17<3:31:08, 7.15s/it] {'loss': 0.3374, 'grad_norm': 0.7562915005474405, 'learning_rate': 5.453897100903244e-07, 'epoch': 0.85} + 85%|████████▌ | 10415/12188 [53:17<3:31:08, 7.15s/it] 85%|████████▌ | 10416/12188 [53:24<3:28:40, 7.07s/it] {'loss': 0.3039, 'grad_norm': 0.7034198556967811, 'learning_rate': 5.447864272346121e-07, 'epoch': 0.85} + 85%|████████▌ | 10416/12188 [53:24<3:28:40, 7.07s/it] 85%|████████▌ | 10417/12188 [53:31<3:29:26, 7.10s/it] {'loss': 0.3333, 'grad_norm': 0.6787052994827447, 'learning_rate': 5.441834589989242e-07, 'epoch': 0.85} + 85%|████████▌ | 10417/12188 [53:32<3:29:26, 7.10s/it] 85%|████████▌ | 10418/12188 [53:38<3:26:59, 7.02s/it] {'loss': 0.2746, 'grad_norm': 0.6785082215551858, 'learning_rate': 5.435808054258429e-07, 'epoch': 0.85} + 85%|████████▌ | 10418/12188 [53:38<3:26:59, 7.02s/it] 85%|████████▌ | 10419/12188 [53:45<3:24:16, 6.93s/it] {'loss': 0.2977, 'grad_norm': 0.6339662603247014, 'learning_rate': 5.42978466557924e-07, 'epoch': 0.85} + 85%|████████▌ | 10419/12188 [53:45<3:24:16, 6.93s/it] 85%|████████▌ | 10420/12188 [53:52<3:23:38, 6.91s/it] {'loss': 0.3074, 'grad_norm': 0.8221744072154719, 'learning_rate': 5.423764424377065e-07, 'epoch': 0.85} + 85%|████████▌ | 10420/12188 [53:52<3:23:38, 6.91s/it] 86%|████████▌ | 10421/12188 [54:00<3:30:37, 7.15s/it] {'loss': 0.2799, 'grad_norm': 0.6326773679031907, 'learning_rate': 5.417747331077017e-07, 'epoch': 0.85} + 86%|████████▌ | 10421/12188 [54:00<3:30:37, 7.15s/it] 86%|████████▌ | 10422/12188 [54:07<3:31:14, 7.18s/it] {'loss': 0.3198, 'grad_norm': 0.6602045663749527, 'learning_rate': 5.411733386104029e-07, 'epoch': 0.86} + 86%|████████▌ | 10422/12188 [54:07<3:31:14, 7.18s/it] 86%|████████▌ | 10423/12188 [54:14<3:28:30, 7.09s/it] {'loss': 0.2626, 'grad_norm': 1.1426886383101764, 'learning_rate': 5.4057225898828e-07, 'epoch': 0.86} + 86%|████████▌ | 10423/12188 [54:14<3:28:30, 7.09s/it] 86%|████████▌ | 10424/12188 [54:24<3:55:20, 8.01s/it] {'loss': 0.3035, 'grad_norm': 0.6565848822833811, 'learning_rate': 5.399714942837792e-07, 'epoch': 0.86} + 86%|████████▌ | 10424/12188 [54:24<3:55:20, 8.01s/it] 86%|████████▌ | 10425/12188 [54:31<3:44:47, 7.65s/it] {'loss': 0.3172, 'grad_norm': 0.6191210434127241, 'learning_rate': 5.393710445393252e-07, 'epoch': 0.86} + 86%|████████▌ | 10425/12188 [54:31<3:44:47, 7.65s/it] 86%|████████▌ | 10426/12188 [54:38<3:38:54, 7.45s/it] {'loss': 0.2795, 'grad_norm': 0.7002029530069326, 'learning_rate': 5.387709097973226e-07, 'epoch': 0.86} + 86%|████████▌ | 10426/12188 [54:38<3:38:54, 7.45s/it] 86%|████████▌ | 10427/12188 [54:45<3:33:18, 7.27s/it] {'loss': 0.3342, 'grad_norm': 0.7428679571900466, 'learning_rate': 5.381710901001497e-07, 'epoch': 0.86} + 86%|████████▌ | 10427/12188 [54:45<3:33:18, 7.27s/it] 86%|████████▌ | 10428/12188 [54:52<3:34:30, 7.31s/it] {'loss': 0.2916, 'grad_norm': 0.6557150723226425, 'learning_rate': 5.37571585490167e-07, 'epoch': 0.86} + 86%|████████▌ | 10428/12188 [54:52<3:34:30, 7.31s/it] 86%|████████▌ | 10429/12188 [54:59<3:30:14, 7.17s/it] {'loss': 0.3143, 'grad_norm': 0.6448007748588743, 'learning_rate': 5.369723960097084e-07, 'epoch': 0.86} + 86%|████████▌ | 10429/12188 [54:59<3:30:14, 7.17s/it] 86%|████████▌ | 10430/12188 [55:07<3:36:00, 7.37s/it] {'loss': 0.324, 'grad_norm': 0.8302972814292012, 'learning_rate': 5.36373521701089e-07, 'epoch': 0.86} + 86%|████████▌ | 10430/12188 [55:07<3:36:00, 7.37s/it] 86%|████████▌ | 10431/12188 [55:14<3:34:04, 7.31s/it] {'loss': 0.2972, 'grad_norm': 0.7029523578995263, 'learning_rate': 5.357749626066011e-07, 'epoch': 0.86} + 86%|████████▌ | 10431/12188 [55:14<3:34:04, 7.31s/it] 86%|████████▌ | 10432/12188 [55:21<3:30:52, 7.21s/it] {'loss': 0.2868, 'grad_norm': 0.6873694549125294, 'learning_rate': 5.351767187685114e-07, 'epoch': 0.86} + 86%|████████▌ | 10432/12188 [55:21<3:30:52, 7.21s/it] 86%|████████▌ | 10433/12188 [55:28<3:28:48, 7.14s/it] {'loss': 0.2672, 'grad_norm': 0.6357403908278147, 'learning_rate': 5.345787902290706e-07, 'epoch': 0.86} + 86%|████████▌ | 10433/12188 [55:28<3:28:48, 7.14s/it] 86%|████████▌ | 10434/12188 [55:35<3:29:41, 7.17s/it] {'loss': 0.2942, 'grad_norm': 0.775093932960794, 'learning_rate': 5.339811770304992e-07, 'epoch': 0.86} + 86%|████████▌ | 10434/12188 [55:35<3:29:41, 7.17s/it] 86%|████████▌ | 10435/12188 [55:42<3:29:33, 7.17s/it] {'loss': 0.27, 'grad_norm': 0.7823288491058169, 'learning_rate': 5.333838792150026e-07, 'epoch': 0.86} + 86%|████████▌ | 10435/12188 [55:42<3:29:33, 7.17s/it] 86%|████████▌ | 10436/12188 [55:49<3:27:12, 7.10s/it] {'loss': 0.2824, 'grad_norm': 0.7250026856526447, 'learning_rate': 5.327868968247613e-07, 'epoch': 0.86} + 86%|████████▌ | 10436/12188 [55:49<3:27:12, 7.10s/it] 86%|████████▌ | 10437/12188 [55:56<3:25:21, 7.04s/it] {'loss': 0.2918, 'grad_norm': 0.726616111753451, 'learning_rate': 5.32190229901931e-07, 'epoch': 0.86} + 86%|████████▌ | 10437/12188 [55:56<3:25:21, 7.04s/it] 86%|████████▌ | 10438/12188 [56:05<3:40:13, 7.55s/it] {'loss': 0.2862, 'grad_norm': 0.7043334229192754, 'learning_rate': 5.315938784886499e-07, 'epoch': 0.86} + 86%|████████▌ | 10438/12188 [56:05<3:40:13, 7.55s/it] 86%|████████▌ | 10439/12188 [56:12<3:35:26, 7.39s/it] {'loss': 0.2819, 'grad_norm': 0.694805681065564, 'learning_rate': 5.309978426270296e-07, 'epoch': 0.86} + 86%|████████▌ | 10439/12188 [56:12<3:35:26, 7.39s/it] 86%|████████▌ | 10440/12188 [56:19<3:31:32, 7.26s/it] {'loss': 0.3204, 'grad_norm': 0.705325667850079, 'learning_rate': 5.30402122359161e-07, 'epoch': 0.86} + 86%|████████▌ | 10440/12188 [56:19<3:31:32, 7.26s/it] 86%|████████▌ | 10441/12188 [56:26<3:33:39, 7.34s/it] {'loss': 0.2666, 'grad_norm': 0.8596593154287862, 'learning_rate': 5.298067177271144e-07, 'epoch': 0.86} + 86%|████████▌ | 10441/12188 [56:26<3:33:39, 7.34s/it] 86%|████████▌ | 10442/12188 [56:33<3:32:34, 7.31s/it] {'loss': 0.2972, 'grad_norm': 0.743331309529327, 'learning_rate': 5.292116287729348e-07, 'epoch': 0.86} + 86%|████████▌ | 10442/12188 [56:33<3:32:34, 7.31s/it] 86%|████████▌ | 10443/12188 [56:40<3:29:38, 7.21s/it] {'loss': 0.2848, 'grad_norm': 0.740275600333234, 'learning_rate': 5.286168555386478e-07, 'epoch': 0.86} + 86%|████████▌ | 10443/12188 [56:40<3:29:38, 7.21s/it] 86%|████████▌ | 10444/12188 [56:47<3:27:51, 7.15s/it] {'loss': 0.3113, 'grad_norm': 0.77122509682468, 'learning_rate': 5.280223980662535e-07, 'epoch': 0.86} + 86%|████████▌ | 10444/12188 [56:47<3:27:51, 7.15s/it] 86%|████████▌ | 10445/12188 [56:55<3:28:55, 7.19s/it] {'loss': 0.3407, 'grad_norm': 0.7866464086927509, 'learning_rate': 5.274282563977328e-07, 'epoch': 0.86} + 86%|████████▌ | 10445/12188 [56:55<3:28:55, 7.19s/it] 86%|████████▌ | 10446/12188 [57:02<3:28:50, 7.19s/it] {'loss': 0.264, 'grad_norm': 0.7472908306023793, 'learning_rate': 5.268344305750439e-07, 'epoch': 0.86} + 86%|████████▌ | 10446/12188 [57:02<3:28:50, 7.19s/it] 86%|████████▌ | 10447/12188 [57:09<3:24:43, 7.06s/it] {'loss': 0.2551, 'grad_norm': 0.6482782979690823, 'learning_rate': 5.262409206401198e-07, 'epoch': 0.86} + 86%|████████▌ | 10447/12188 [57:09<3:24:43, 7.06s/it] 86%|████████▌ | 10448/12188 [57:15<3:21:47, 6.96s/it] {'loss': 0.3268, 'grad_norm': 0.8025204862231836, 'learning_rate': 5.256477266348747e-07, 'epoch': 0.86} + 86%|████████▌ | 10448/12188 [57:15<3:21:47, 6.96s/it] 86%|████████▌ | 10449/12188 [57:23<3:27:55, 7.17s/it] {'loss': 0.3102, 'grad_norm': 0.673269270159495, 'learning_rate': 5.250548486011992e-07, 'epoch': 0.86} + 86%|████████▌ | 10449/12188 [57:23<3:27:55, 7.17s/it] 86%|████████▌ | 10450/12188 [57:33<3:54:37, 8.10s/it] {'loss': 0.2879, 'grad_norm': 0.7138217367168923, 'learning_rate': 5.244622865809596e-07, 'epoch': 0.86} + 86%|████████▌ | 10450/12188 [57:33<3:54:37, 8.10s/it] 86%|████████▌ | 10451/12188 [57:40<3:46:09, 7.81s/it] {'loss': 0.3506, 'grad_norm': 0.6954340638861432, 'learning_rate': 5.238700406160036e-07, 'epoch': 0.86} + 86%|████████▌ | 10451/12188 [57:41<3:46:09, 7.81s/it] 86%|████████▌ | 10452/12188 [57:47<3:37:47, 7.53s/it] {'loss': 0.2699, 'grad_norm': 0.6844201280390024, 'learning_rate': 5.232781107481544e-07, 'epoch': 0.86} + 86%|████████▌ | 10452/12188 [57:47<3:37:47, 7.53s/it] 86%|████████▌ | 10453/12188 [57:54<3:29:25, 7.24s/it] {'loss': 0.2571, 'grad_norm': 0.676398154557605, 'learning_rate': 5.226864970192114e-07, 'epoch': 0.86} + 86%|████████▌ | 10453/12188 [57:54<3:29:25, 7.24s/it] 86%|████████▌ | 10454/12188 [58:02<3:35:06, 7.44s/it] {'loss': 0.2765, 'grad_norm': 0.6679555759989788, 'learning_rate': 5.220951994709555e-07, 'epoch': 0.86} + 86%|████████▌ | 10454/12188 [58:02<3:35:06, 7.44s/it] 86%|████████▌ | 10455/12188 [58:09<3:28:53, 7.23s/it] {'loss': 0.2639, 'grad_norm': 0.6921434902122404, 'learning_rate': 5.215042181451418e-07, 'epoch': 0.86} + 86%|████████▌ | 10455/12188 [58:09<3:28:53, 7.23s/it] 86%|████████▌ | 10456/12188 [58:15<3:23:17, 7.04s/it] {'loss': 0.3034, 'grad_norm': 0.8025424103986046, 'learning_rate': 5.209135530835053e-07, 'epoch': 0.86} + 86%|█���██████▌ | 10456/12188 [58:15<3:23:17, 7.04s/it] 86%|████████▌ | 10457/12188 [58:23<3:28:42, 7.23s/it] {'loss': 0.3283, 'grad_norm': 0.7148671966293527, 'learning_rate': 5.203232043277568e-07, 'epoch': 0.86} + 86%|████████▌ | 10457/12188 [58:23<3:28:42, 7.23s/it] 86%|████████▌ | 10458/12188 [58:33<3:50:12, 7.98s/it] {'loss': 0.3211, 'grad_norm': 1.0134099101786544, 'learning_rate': 5.197331719195864e-07, 'epoch': 0.86} + 86%|████████▌ | 10458/12188 [58:33<3:50:12, 7.98s/it] 86%|████████▌ | 10459/12188 [58:39<3:36:16, 7.51s/it] {'loss': 0.2531, 'grad_norm': 0.7256680676321415, 'learning_rate': 5.191434559006625e-07, 'epoch': 0.86} + 86%|████████▌ | 10459/12188 [58:39<3:36:16, 7.51s/it] 86%|████████▌ | 10460/12188 [58:47<3:37:48, 7.56s/it] {'loss': 0.3272, 'grad_norm': 0.7182383618776872, 'learning_rate': 5.185540563126274e-07, 'epoch': 0.86} + 86%|████████▌ | 10460/12188 [58:47<3:37:48, 7.56s/it] 86%|████████▌ | 10461/12188 [58:53<3:31:08, 7.34s/it] {'loss': 0.3385, 'grad_norm': 0.7496723603065563, 'learning_rate': 5.179649731971059e-07, 'epoch': 0.86} + 86%|████████▌ | 10461/12188 [58:54<3:31:08, 7.34s/it] 86%|████████▌ | 10462/12188 [59:00<3:26:08, 7.17s/it] {'loss': 0.2811, 'grad_norm': 0.6891523729107191, 'learning_rate': 5.173762065956967e-07, 'epoch': 0.86} + 86%|████████▌ | 10462/12188 [59:00<3:26:08, 7.17s/it] 86%|████████▌ | 10463/12188 [59:07<3:24:28, 7.11s/it] {'loss': 0.3019, 'grad_norm': 0.6614636908200474, 'learning_rate': 5.167877565499774e-07, 'epoch': 0.86} + 86%|████████▌ | 10463/12188 [59:07<3:24:28, 7.11s/it] 86%|████████▌ | 10464/12188 [59:17<3:48:25, 7.95s/it] {'loss': 0.2909, 'grad_norm': 0.7819332830741355, 'learning_rate': 5.161996231015049e-07, 'epoch': 0.86} + 86%|████████▌ | 10464/12188 [59:17<3:48:25, 7.95s/it] 86%|████████▌ | 10465/12188 [59:24<3:37:22, 7.57s/it] {'loss': 0.3067, 'grad_norm': 0.7788197101768075, 'learning_rate': 5.156118062918098e-07, 'epoch': 0.86} + 86%|████████▌ | 10465/12188 [59:24<3:37:22, 7.57s/it] 86%|████████▌ | 10466/12188 [59:31<3:35:15, 7.50s/it] {'loss': 0.3223, 'grad_norm': 0.659613550161262, 'learning_rate': 5.150243061624055e-07, 'epoch': 0.86} + 86%|████████▌ | 10466/12188 [59:31<3:35:15, 7.50s/it] 86%|████████▌ | 10467/12188 [59:40<3:42:30, 7.76s/it] {'loss': 0.2631, 'grad_norm': 0.6426622950436653, 'learning_rate': 5.144371227547795e-07, 'epoch': 0.86} + 86%|████████▌ | 10467/12188 [59:40<3:42:30, 7.76s/it] 86%|████████▌ | 10468/12188 [59:47<3:35:38, 7.52s/it] {'loss': 0.284, 'grad_norm': 0.6551515609050332, 'learning_rate': 5.138502561103959e-07, 'epoch': 0.86} + 86%|████████▌ | 10468/12188 [59:47<3:35:38, 7.52s/it] 86%|████████▌ | 10469/12188 [59:55<3:40:08, 7.68s/it] {'loss': 0.2899, 'grad_norm': 0.7121862042590171, 'learning_rate': 5.132637062707008e-07, 'epoch': 0.86} + 86%|████████▌ | 10469/12188 [59:55<3:40:08, 7.68s/it] 86%|████████▌ | 10470/12188 [1:00:02<3:38:13, 7.62s/it] {'loss': 0.2746, 'grad_norm': 0.7242889722574, 'learning_rate': 5.126774732771129e-07, 'epoch': 0.86} + 86%|████████▌ | 10470/12188 [1:00:02<3:38:13, 7.62s/it] 86%|████████▌ | 10471/12188 [1:00:09<3:31:18, 7.38s/it] {'loss': 0.3179, 'grad_norm': 0.7314597998236294, 'learning_rate': 5.120915571710328e-07, 'epoch': 0.86} + 86%|████████▌ | 10471/12188 [1:00:09<3:31:18, 7.38s/it] 86%|████████▌ | 10472/12188 [1:00:16<3:28:38, 7.30s/it] {'loss': 0.2807, 'grad_norm': 0.6882312893698928, 'learning_rate': 5.115059579938375e-07, 'epoch': 0.86} + 86%|████████▌ | 10472/12188 [1:00:16<3:28:38, 7.30s/it] 86%|████████▌ | 10473/12188 [1:00:23<3:24:39, 7.16s/it] {'loss': 0.3045, 'grad_norm': 0.6724404733925822, 'learning_rate': 5.10920675786879e-07, 'epoch': 0.86} + 86%|████████▌ | 10473/12188 [1:00:23<3:24:39, 7.16s/it] 86%|████████▌ | 10474/12188 [1:00:30<3:25:35, 7.20s/it] {'loss': 0.3121, 'grad_norm': 0.6899778077785419, 'learning_rate': 5.103357105914913e-07, 'epoch': 0.86} + 86%|████████▌ | 10474/12188 [1:00:30<3:25:35, 7.20s/it] 86%|████████▌ | 10475/12188 [1:00:37<3:25:25, 7.20s/it] {'loss': 0.3075, 'grad_norm': 0.6872568677008554, 'learning_rate': 5.097510624489816e-07, 'epoch': 0.86} + 86%|████████▌ | 10475/12188 [1:00:37<3:25:25, 7.20s/it] 86%|████████▌ | 10476/12188 [1:00:44<3:23:31, 7.13s/it] {'loss': 0.3149, 'grad_norm': 0.6987369269253456, 'learning_rate': 5.091667314006371e-07, 'epoch': 0.86} + 86%|████████▌ | 10476/12188 [1:00:44<3:23:31, 7.13s/it] 86%|████████▌ | 10477/12188 [1:00:52<3:27:32, 7.28s/it] {'loss': 0.301, 'grad_norm': 0.6898171363510426, 'learning_rate': 5.085827174877245e-07, 'epoch': 0.86} + 86%|████████▌ | 10477/12188 [1:00:52<3:27:32, 7.28s/it] 86%|████████▌ | 10478/12188 [1:00:59<3:23:39, 7.15s/it] {'loss': 0.2811, 'grad_norm': 0.7249993497516516, 'learning_rate': 5.079990207514835e-07, 'epoch': 0.86} + 86%|████████▌ | 10478/12188 [1:00:59<3:23:39, 7.15s/it] 86%|████████▌ | 10479/12188 [1:01:06<3:21:01, 7.06s/it] {'loss': 0.3054, 'grad_norm': 0.6349448895992833, 'learning_rate': 5.074156412331354e-07, 'epoch': 0.86} + 86%|████████▌ | 10479/12188 [1:01:06<3:21:01, 7.06s/it] 86%|████████▌ | 10480/12188 [1:01:13<3:20:38, 7.05s/it] {'loss': 0.3121, 'grad_norm': 0.7958607152716314, 'learning_rate': 5.068325789738771e-07, 'epoch': 0.86} + 86%|████████▌ | 10480/12188 [1:01:13<3:20:38, 7.05s/it] 86%|████████▌ | 10481/12188 [1:01:20<3:25:33, 7.23s/it] {'loss': 0.279, 'grad_norm': 0.6541176178380216, 'learning_rate': 5.062498340148819e-07, 'epoch': 0.86} + 86%|████████▌ | 10481/12188 [1:01:20<3:25:33, 7.23s/it] 86%|████████▌ | 10482/12188 [1:01:27<3:21:38, 7.09s/it] {'loss': 0.2694, 'grad_norm': 0.6699619987391173, 'learning_rate': 5.05667406397305e-07, 'epoch': 0.86} + 86%|████████▌ | 10482/12188 [1:01:27<3:21:38, 7.09s/it] 86%|████████▌ | 10483/12188 [1:01:33<3:16:17, 6.91s/it] {'loss': 0.2864, 'grad_norm': 0.6631662651713326, 'learning_rate': 5.050852961622738e-07, 'epoch': 0.86} + 86%|████████▌ | 10483/12188 [1:01:34<3:16:17, 6.91s/it] 86%|████████▌ | 10484/12188 [1:01:41<3:17:04, 6.94s/it] {'loss': 0.2884, 'grad_norm': 0.6733560806983567, 'learning_rate': 5.045035033508977e-07, 'epoch': 0.86} + 86%|████████▌ | 10484/12188 [1:01:41<3:17:04, 6.94s/it] 86%|████████▌ | 10485/12188 [1:01:47<3:17:10, 6.95s/it] {'loss': 0.3074, 'grad_norm': 0.753403254868042, 'learning_rate': 5.039220280042623e-07, 'epoch': 0.86} + 86%|████████▌ | 10485/12188 [1:01:47<3:17:10, 6.95s/it] 86%|████████▌ | 10486/12188 [1:01:54<3:15:23, 6.89s/it] {'loss': 0.2917, 'grad_norm': 0.7152986149465859, 'learning_rate': 5.03340870163429e-07, 'epoch': 0.86} + 86%|████████▌ | 10486/12188 [1:01:54<3:15:23, 6.89s/it] 86%|████████▌ | 10487/12188 [1:02:04<3:42:46, 7.86s/it] {'loss': 0.3146, 'grad_norm': 0.7349895724552785, 'learning_rate': 5.027600298694397e-07, 'epoch': 0.86} + 86%|████████▌ | 10487/12188 [1:02:04<3:42:46, 7.86s/it] 86%|████████▌ | 10488/12188 [1:02:12<3:38:04, 7.70s/it] {'loss': 0.2829, 'grad_norm': 0.6238752080764198, 'learning_rate': 5.021795071633113e-07, 'epoch': 0.86} + 86%|████████▌ | 10488/12188 [1:02:12<3:38:04, 7.70s/it] 86%|████████▌ | 10489/12188 [1:02:18<3:30:19, 7.43s/it] {'loss': 0.3033, 'grad_norm': 0.758593555709412, 'learning_rate': 5.015993020860394e-07, 'epoch': 0.86} + 86%|████████▌ | 10489/12188 [1:02:18<3:30:19, 7.43s/it] 86%|████████▌ | 10490/12188 [1:02:25<3:25:03, 7.25s/it] {'loss': 0.2973, 'grad_norm': 0.7564498645106432, 'learning_rate': 5.010194146785985e-07, 'epoch': 0.86} + 86%|████████▌ | 10490/12188 [1:02:25<3:25:03, 7.25s/it] 86%|████████▌ | 10491/12188 [1:02:33<3:29:56, 7.42s/it] {'loss': 0.2763, 'grad_norm': 0.6663847303224121, 'learning_rate': 5.004398449819376e-07, 'epoch': 0.86} + 86%|████████▌ | 10491/12188 [1:02:33<3:29:56, 7.42s/it] 86%|████████▌ | 10492/12188 [1:02:42<3:42:07, 7.86s/it] {'loss': 0.378, 'grad_norm': 0.7854330484093242, 'learning_rate': 4.998605930369865e-07, 'epoch': 0.86} + 86%|████████▌ | 10492/12188 [1:02:42<3:42:07, 7.86s/it] 86%|████████▌ | 10493/12188 [1:02:49<3:32:49, 7.53s/it] {'loss': 0.313, 'grad_norm': 0.6984120660348335, 'learning_rate': 4.992816588846495e-07, 'epoch': 0.86} + 86%|████████▌ | 10493/12188 [1:02:49<3:32:49, 7.53s/it] 86%|████████▌ | 10494/12188 [1:02:57<3:34:31, 7.60s/it] {'loss': 0.3145, 'grad_norm': 0.6882376371826255, 'learning_rate': 4.987030425658118e-07, 'epoch': 0.86} + 86%|████████▌ | 10494/12188 [1:02:57<3:34:31, 7.60s/it] 86%|████████▌ | 10495/12188 [1:03:03<3:28:33, 7.39s/it] {'loss': 0.2971, 'grad_norm': 0.6997104342234349, 'learning_rate': 4.981247441213333e-07, 'epoch': 0.86} + 86%|████████▌ | 10495/12188 [1:03:03<3:28:33, 7.39s/it] 86%|████████▌ | 10496/12188 [1:03:11<3:26:06, 7.31s/it] {'loss': 0.3301, 'grad_norm': 0.63931761248081, 'learning_rate': 4.975467635920517e-07, 'epoch': 0.86} + 86%|████████▌ | 10496/12188 [1:03:11<3:26:06, 7.31s/it] 86%|████████▌ | 10497/12188 [1:03:20<3:42:45, 7.90s/it] {'loss': 0.2896, 'grad_norm': 1.1531016897906137, 'learning_rate': 4.969691010187838e-07, 'epoch': 0.86} + 86%|████████▌ | 10497/12188 [1:03:20<3:42:45, 7.90s/it] 86%|████████▌ | 10498/12188 [1:03:28<3:45:43, 8.01s/it] {'loss': 0.3208, 'grad_norm': 0.6868647661993389, 'learning_rate': 4.963917564423243e-07, 'epoch': 0.86} + 86%|████████▌ | 10498/12188 [1:03:28<3:45:43, 8.01s/it] 86%|████████▌ | 10499/12188 [1:03:37<3:56:37, 8.41s/it] {'loss': 0.2809, 'grad_norm': 0.6673085592368974, 'learning_rate': 4.958147299034421e-07, 'epoch': 0.86} + 86%|████████▌ | 10499/12188 [1:03:37<3:56:37, 8.41s/it] 86%|████████▌ | 10500/12188 [1:03:44<3:41:20, 7.87s/it] {'loss': 0.2981, 'grad_norm': 0.6787717353470205, 'learning_rate': 4.952380214428886e-07, 'epoch': 0.86} + 86%|████████▌ | 10500/12188 [1:03:44<3:41:20, 7.87s/it] 86%|████████▌ | 10501/12188 [1:03:51<3:34:46, 7.64s/it] {'loss': 0.3033, 'grad_norm': 0.6513231179327369, 'learning_rate': 4.946616311013875e-07, 'epoch': 0.86} + 86%|████████▌ | 10501/12188 [1:03:51<3:34:46, 7.64s/it] 86%|████████▌ | 10502/12188 [1:03:58<3:27:38, 7.39s/it] {'loss': 0.3027, 'grad_norm': 1.2266266786498807, 'learning_rate': 4.940855589196436e-07, 'epoch': 0.86} + 86%|████████▌ | 10502/12188 [1:03:58<3:27:38, 7.39s/it] 86%|████████▌ | 10503/12188 [1:04:05<3:24:47, 7.29s/it] {'loss': 0.3193, 'grad_norm': 0.8163264517860556, 'learning_rate': 4.935098049383391e-07, 'epoch': 0.86} + 86%|████████▌ | 10503/12188 [1:04:05<3:24:47, 7.29s/it] 86%|████████▌ | 10504/12188 [1:04:13<3:26:17, 7.35s/it] {'loss': 0.3101, 'grad_norm': 0.7063359504369663, 'learning_rate': 4.929343691981309e-07, 'epoch': 0.86} + 86%|████████▌ | 10504/12188 [1:04:13<3:26:17, 7.35s/it] 86%|████████▌ | 10505/12188 [1:04:22<3:42:23, 7.93s/it] {'loss': 0.3024, 'grad_norm': 0.7150605256071769, 'learning_rate': 4.923592517396575e-07, 'epoch': 0.86} + 86%|████████▌ | 10505/12188 [1:04:22<3:42:23, 7.93s/it] 86%|████████▌ | 10506/12188 [1:04:29<3:39:47, 7.84s/it] {'loss': 0.2988, 'grad_norm': 0.7466792144958719, 'learning_rate': 4.917844526035304e-07, 'epoch': 0.86} + 86%|████████▌ | 10506/12188 [1:04:29<3:39:47, 7.84s/it] 86%|████████▌ | 10507/12188 [1:04:36<3:31:20, 7.54s/it] {'loss': 0.3001, 'grad_norm': 0.6692988897640089, 'learning_rate': 4.912099718303437e-07, 'epoch': 0.86} + 86%|████████▌ | 10507/12188 [1:04:36<3:31:20, 7.54s/it] 86%|████████▌ | 10508/12188 [1:04:43<3:27:31, 7.41s/it] {'loss': 0.2895, 'grad_norm': 0.6891191662524733, 'learning_rate': 4.906358094606645e-07, 'epoch': 0.86} + 86%|████████▌ | 10508/12188 [1:04:43<3:27:31, 7.41s/it] 86%|████████▌ | 10509/12188 [1:04:51<3:32:17, 7.59s/it] {'loss': 0.2814, 'grad_norm': 0.7160723844988542, 'learning_rate': 4.900619655350386e-07, 'epoch': 0.86} + 86%|████████▌ | 10509/12188 [1:04:51<3:32:17, 7.59s/it] 86%|████████▌ | 10510/12188 [1:05:01<3:46:17, 8.09s/it] {'loss': 0.3042, 'grad_norm': 0.6645773065970483, 'learning_rate': 4.89488440093992e-07, 'epoch': 0.86} + 86%|████████▌ | 10510/12188 [1:05:01<3:46:17, 8.09s/it] 86%|████████▌ | 10511/12188 [1:05:08<3:36:43, 7.75s/it] {'loss': 0.2725, 'grad_norm': 0.7181509670597426, 'learning_rate': 4.889152331780245e-07, 'epoch': 0.86} + 86%|████████▌ | 10511/12188 [1:05:08<3:36:43, 7.75s/it] 86%|████████▌ | 10512/12188 [1:05:14<3:25:47, 7.37s/it] {'loss': 0.3154, 'grad_norm': 0.7054924422197454, 'learning_rate': 4.883423448276154e-07, 'epoch': 0.86} + 86%|████████▌ | 10512/12188 [1:05:14<3:25:47, 7.37s/it] 86%|████████▋ | 10513/12188 [1:05:21<3:23:20, 7.28s/it] {'loss': 0.2696, 'grad_norm': 0.6474819174556822, 'learning_rate': 4.877697750832222e-07, 'epoch': 0.86} + 86%|████████▋ | 10513/12188 [1:05:21<3:23:20, 7.28s/it] 86%|████████▋ | 10514/12188 [1:05:29<3:27:01, 7.42s/it] {'loss': 0.2755, 'grad_norm': 0.6936041080635829, 'learning_rate': 4.871975239852766e-07, 'epoch': 0.86} + 86%|████████▋ | 10514/12188 [1:05:29<3:27:01, 7.42s/it] 86%|████████▋ | 10515/12188 [1:05:36<3:22:53, 7.28s/it] {'loss': 0.3272, 'grad_norm': 0.7249458561088006, 'learning_rate': 4.866255915741924e-07, 'epoch': 0.86} + 86%|████████▋ | 10515/12188 [1:05:36<3:22:53, 7.28s/it] 86%|████████▋ | 10516/12188 [1:05:43<3:22:54, 7.28s/it] {'loss': 0.3214, 'grad_norm': 0.739793704839267, 'learning_rate': 4.860539778903579e-07, 'epoch': 0.86} + 86%|████████▋ | 10516/12188 [1:05:43<3:22:54, 7.28s/it] 86%|████████▋ | 10517/12188 [1:05:50<3:19:35, 7.17s/it] {'loss': 0.2791, 'grad_norm': 0.6743627336815983, 'learning_rate': 4.854826829741388e-07, 'epoch': 0.86} + 86%|████████▋ | 10517/12188 [1:05:50<3:19:35, 7.17s/it][2025-08-18 12:04:02,950] [WARNING] [stage3.py:2118:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time + 86%|████████▋ | 10518/12188 [1:06:01<3:54:29, 8.43s/it] {'loss': 0.2941, 'grad_norm': 0.8658713750638113, 'learning_rate': 4.849117068658799e-07, 'epoch': 0.86} + 86%|████████▋ | 10518/12188 [1:06:01<3:54:29, 8.43s/it] 86%|████████▋ | 10519/12188 [1:06:08<3:42:46, 8.01s/it] {'loss': 0.2717, 'grad_norm': 0.6522476186278326, 'learning_rate': 4.84341049605902e-07, 'epoch': 0.86} + 86%|████████▋ | 10519/12188 [1:06:08<3:42:46, 8.01s/it] 86%|████████▋ | 10520/12188 [1:06:16<3:40:20, 7.93s/it] {'loss': 0.3045, 'grad_norm': 0.7976895857172894, 'learning_rate': 4.837707112345047e-07, 'epoch': 0.86} + 86%|████████▋ | 10520/12188 [1:06:16<3:40:20, 7.93s/it] 86%|████████▋ | 10521/12188 [1:06:23<3:33:24, 7.68s/it] {'loss': 0.3186, 'grad_norm': 0.7330548131610825, 'learning_rate': 4.832006917919629e-07, 'epoch': 0.86} + 86%|████████▋ | 10521/12188 [1:06:23<3:33:24, 7.68s/it] 86%|████████▋ | 10522/12188 [1:06:30<3:26:27, 7.44s/it] {'loss': 0.2735, 'grad_norm': 0.6931152034899195, 'learning_rate': 4.826309913185323e-07, 'epoch': 0.86} + 86%|████████▋ | 10522/12188 [1:06:30<3:26:27, 7.44s/it] 86%|████████▋ | 10523/12188 [1:06:37<3:22:57, 7.31s/it] {'loss': 0.2942, 'grad_norm': 0.6936469542269598, 'learning_rate': 4.820616098544439e-07, 'epoch': 0.86} + 86%|████████▋ | 10523/12188 [1:06:37<3:22:57, 7.31s/it] 86%|████████▋ | 10524/12188 [1:06:44<3:19:26, 7.19s/it] {'loss': 0.2855, 'grad_norm': 0.6911985445388491, 'learning_rate': 4.814925474399046e-07, 'epoch': 0.86} + 86%|████████▋ | 10524/12188 [1:06:44<3:19:26, 7.19s/it] 86%|████████▋ | 10525/12188 [1:06:53<3:37:34, 7.85s/it] {'loss': 0.2784, 'grad_norm': 0.6528171547816409, 'learning_rate': 4.809238041151021e-07, 'epoch': 0.86} + 86%|████████▋ | 10525/12188 [1:06:53<3:37:34, 7.85s/it] 86%|████████▋ | 10526/12188 [1:07:01<3:32:18, 7.66s/it] {'loss': 0.3233, 'grad_norm': 0.7140323963227849, 'learning_rate': 4.803553799202016e-07, 'epoch': 0.86} + 86%|████████▋ | 10526/12188 [1:07:01<3:32:18, 7.66s/it] 86%|████████▋ | 10527/12188 [1:07:08<3:30:26, 7.60s/it] {'loss': 0.296, 'grad_norm': 0.6922333382148136, 'learning_rate': 4.797872748953414e-07, 'epoch': 0.86} + 86%|████████▋ | 10527/12188 [1:07:08<3:30:26, 7.60s/it] 86%|████████▋ | 10528/12188 [1:07:17<3:42:05, 8.03s/it] {'loss': 0.3159, 'grad_norm': 0.755751191371869, 'learning_rate': 4.792194890806423e-07, 'epoch': 0.86} + 86%|████████▋ | 10528/12188 [1:07:17<3:42:05, 8.03s/it] 86%|████████▋ | 10529/12188 [1:07:24<3:35:46, 7.80s/it] {'loss': 0.3629, 'grad_norm': 0.7010512023219468, 'learning_rate': 4.786520225161989e-07, 'epoch': 0.86} + 86%|████████▋ | 10529/12188 [1:07:24<3:35:46, 7.80s/it] 86%|████████▋ | 10530/12188 [1:07:32<3:30:16, 7.61s/it] {'loss': 0.2855, 'grad_norm': 0.6106439593008155, 'learning_rate': 4.780848752420858e-07, 'epoch': 0.86} + 86%|████████▋ | 10530/12188 [1:07:32<3:30:16, 7.61s/it] 86%|████████▋ | 10531/12188 [1:07:39<3:25:48, 7.45s/it] {'loss': 0.3256, 'grad_norm': 0.6927639054312313, 'learning_rate': 4.775180472983549e-07, 'epoch': 0.86} + 86%|████████▋ | 10531/12188 [1:07:39<3:25:48, 7.45s/it] 86%|████████▋ | 10532/12188 [1:07:45<3:19:01, 7.21s/it] {'loss': 0.3056, 'grad_norm': 0.7454284309097802, 'learning_rate': 4.769515387250317e-07, 'epoch': 0.86} + 86%|████████▋ | 10532/12188 [1:07:45<3:19:01, 7.21s/it] 86%|████████▋ | 10533/12188 [1:07:53<3:20:08, 7.26s/it] {'loss': 0.3228, 'grad_norm': 0.7037712285585387, 'learning_rate': 4.763853495621251e-07, 'epoch': 0.86} + 86%|████████▋ | 10533/12188 [1:07:53<3:20:08, 7.26s/it] 86%|████████▋ | 10534/12188 [1:08:00<3:17:06, 7.15s/it] {'loss': 0.3025, 'grad_norm': 0.7106786644762823, 'learning_rate': 4.75819479849618e-07, 'epoch': 0.86} + 86%|████████▋ | 10534/12188 [1:08:00<3:17:06, 7.15s/it] 86%|████████▋ | 10535/12188 [1:08:06<3:12:49, 7.00s/it] {'loss': 0.2879, 'grad_norm': 0.6866205349653082, 'learning_rate': 4.7525392962746997e-07, 'epoch': 0.86} + 86%|████████▋ | 10535/12188 [1:08:06<3:12:49, 7.00s/it] 86%|████████▋ | 10536/12188 [1:08:13<3:10:47, 6.93s/it] {'loss': 0.2761, 'grad_norm': 0.6732242113803065, 'learning_rate': 4.746886989356203e-07, 'epoch': 0.86} + 86%|████████▋ | 10536/12188 [1:08:13<3:10:47, 6.93s/it] 86%|████████▋ | 10537/12188 [1:08:20<3:08:44, 6.86s/it] {'loss': 0.2824, 'grad_norm': 0.692972879479816, 'learning_rate': 4.741237878139837e-07, 'epoch': 0.86} + 86%|████████▋ | 10537/12188 [1:08:20<3:08:44, 6.86s/it] 86%|████████▋ | 10538/12188 [1:08:27<3:14:28, 7.07s/it] {'loss': 0.3321, 'grad_norm': 0.7355698215011806, 'learning_rate': 4.735591963024533e-07, 'epoch': 0.86} + 86%|████████▋ | 10538/12188 [1:08:27<3:14:28, 7.07s/it] 86%|████████▋ | 10539/12188 [1:08:34<3:13:47, 7.05s/it] {'loss': 0.3283, 'grad_norm': 0.7520244934976308, 'learning_rate': 4.72994924440901e-07, 'epoch': 0.86} + 86%|████████▋ | 10539/12188 [1:08:34<3:13:47, 7.05s/it] 86%|████████▋ | 10540/12188 [1:08:42<3:17:04, 7.18s/it] {'loss': 0.3028, 'grad_norm': 0.7449849003558008, 'learning_rate': 4.7243097226917334e-07, 'epoch': 0.86} + 86%|████████▋ | 10540/12188 [1:08:42<3:17:04, 7.18s/it] 86%|████████▋ | 10541/12188 [1:08:50<3:28:08, 7.58s/it] {'loss': 0.3012, 'grad_norm': 0.7487393427162734, 'learning_rate': 4.718673398270973e-07, 'epoch': 0.86} + 86%|████████▋ | 10541/12188 [1:08:50<3:28:08, 7.58s/it] 86%|████████▋ | 10542/12188 [1:08:57<3:20:14, 7.30s/it] {'loss': 0.3218, 'grad_norm': 0.7006890708276878, 'learning_rate': 4.7130402715447386e-07, 'epoch': 0.86} + 86%|████████▋ | 10542/12188 [1:08:57<3:20:14, 7.30s/it] 87%|████████▋ | 10543/12188 [1:09:04<3:17:00, 7.19s/it] {'loss': 0.2836, 'grad_norm': 0.6759176719636786, 'learning_rate': 4.7074103429108397e-07, 'epoch': 0.86} + 87%|████████▋ | 10543/12188 [1:09:04<3:17:00, 7.19s/it] 87%|████████▋ | 10544/12188 [1:09:11<3:13:31, 7.06s/it] {'loss': 0.3086, 'grad_norm': 0.720447192177282, 'learning_rate': 4.7017836127668636e-07, 'epoch': 0.87} + 87%|████████▋ | 10544/12188 [1:09:11<3:13:31, 7.06s/it] 87%|████████▋ | 10545/12188 [1:09:18<3:16:57, 7.19s/it] {'loss': 0.2904, 'grad_norm': 0.7512951729160456, 'learning_rate': 4.696160081510143e-07, 'epoch': 0.87} + 87%|████████▋ | 10545/12188 [1:09:18<3:16:57, 7.19s/it] 87%|████████▋ | 10546/12188 [1:09:25<3:10:30, 6.96s/it] {'loss': 0.3021, 'grad_norm': 0.7252883439424053, 'learning_rate': 4.69053974953782e-07, 'epoch': 0.87} + 87%|████████▋ | 10546/12188 [1:09:25<3:10:30, 6.96s/it] 87%|████████▋ | 10547/12188 [1:09:31<3:05:11, 6.77s/it] {'loss': 0.2788, 'grad_norm': 0.6633344323919425, 'learning_rate': 4.684922617246773e-07, 'epoch': 0.87} + 87%|████████▋ | 10547/12188 [1:09:31<3:05:11, 6.77s/it] 87%|████████▋ | 10548/12188 [1:09:38<3:07:54, 6.87s/it] {'loss': 0.3082, 'grad_norm': 0.7353065783758499, 'learning_rate': 4.679308685033701e-07, 'epoch': 0.87} + 87%|████████▋ | 10548/12188 [1:09:38<3:07:54, 6.87s/it] 87%|████████▋ | 10549/12188 [1:09:45<3:07:01, 6.85s/it] {'loss': 0.3033, 'grad_norm': 0.7351368057104488, 'learning_rate': 4.67369795329502e-07, 'epoch': 0.87} + 87%|████████▋ | 10549/12188 [1:09:45<3:07:01, 6.85s/it] 87%|████████▋ | 10550/12188 [1:09:52<3:06:34, 6.83s/it] {'loss': 0.3084, 'grad_norm': 1.0513677658768643, 'learning_rate': 4.6680904224269797e-07, 'epoch': 0.87} + 87%|████████▋ | 10550/12188 [1:09:52<3:06:34, 6.83s/it] 87%|████████▋ | 10551/12188 [1:09:59<3:09:37, 6.95s/it] {'loss': 0.3034, 'grad_norm': 0.7167034793927923, 'learning_rate': 4.6624860928255576e-07, 'epoch': 0.87} + 87%|████████▋ | 10551/12188 [1:09:59<3:09:37, 6.95s/it] 87%|████████▋ | 10552/12188 [1:10:06<3:08:47, 6.92s/it] {'loss': 0.3209, 'grad_norm': 0.7631000666520177, 'learning_rate': 4.6568849648865323e-07, 'epoch': 0.87} + 87%|████████▋ | 10552/12188 [1:10:06<3:08:47, 6.92s/it] 87%|████████▋ | 10553/12188 [1:10:13<3:08:52, 6.93s/it] {'loss': 0.2922, 'grad_norm': 0.7019222147190464, 'learning_rate': 4.651287039005431e-07, 'epoch': 0.87} + 87%|████████▋ | 10553/12188 [1:10:13<3:08:52, 6.93s/it] 87%|████████▋ | 10554/12188 [1:10:20<3:10:34, 7.00s/it] {'loss': 0.2878, 'grad_norm': 0.9611881331111695, 'learning_rate': 4.6456923155775934e-07, 'epoch': 0.87} + 87%|████████▋ | 10554/12188 [1:10:20<3:10:34, 7.00s/it] 87%|████████▋ | 10555/12188 [1:10:27<3:10:14, 6.99s/it] {'loss': 0.2824, 'grad_norm': 0.6771159758782442, 'learning_rate': 4.640100794998087e-07, 'epoch': 0.87} + 87%|████████▋ | 10555/12188 [1:10:27<3:10:14, 6.99s/it] 87%|████████▋ | 10556/12188 [1:10:34<3:11:00, 7.02s/it] {'loss': 0.2724, 'grad_norm': 0.7251380706202112, 'learning_rate': 4.6345124776617847e-07, 'epoch': 0.87} + 87%|████████▋ | 10556/12188 [1:10:34<3:11:00, 7.02s/it] 87%|████████▋ | 10557/12188 [1:10:40<3:06:55, 6.88s/it] {'loss': 0.3169, 'grad_norm': 0.726816715949741, 'learning_rate': 4.628927363963337e-07, 'epoch': 0.87} + 87%|████████▋ | 10557/12188 [1:10:40<3:06:55, 6.88s/it] 87%|████████▋ | 10558/12188 [1:10:48<3:10:20, 7.01s/it] {'loss': 0.2807, 'grad_norm': 0.6412088339476956, 'learning_rate': 4.623345454297135e-07, 'epoch': 0.87} + 87%|████████▋ | 10558/12188 [1:10:48<3:10:20, 7.01s/it] 87%|████████▋ | 10559/12188 [1:10:55<3:09:51, 6.99s/it] {'loss': 0.2518, 'grad_norm': 0.6671857490547741, 'learning_rate': 4.617766749057384e-07, 'epoch': 0.87} + 87%|████████▋ | 10559/12188 [1:10:55<3:09:51, 6.99s/it] 87%|████████▋ | 10560/12188 [1:11:02<3:10:03, 7.00s/it] {'loss': 0.2681, 'grad_norm': 0.8439287938511166, 'learning_rate': 4.61219124863802e-07, 'epoch': 0.87} + 87%|████████▋ | 10560/12188 [1:11:02<3:10:03, 7.00s/it] 87%|████████▋ | 10561/12188 [1:11:09<3:16:26, 7.24s/it] {'loss': 0.2994, 'grad_norm': 0.6899404171727658, 'learning_rate': 4.606618953432795e-07, 'epoch': 0.87} + 87%|████████▋ | 10561/12188 [1:11:09<3:16:26, 7.24s/it] 87%|████████▋ | 10562/12188 [1:11:16<3:12:28, 7.10s/it] {'loss': 0.2993, 'grad_norm': 0.7152171473913713, 'learning_rate': 4.601049863835216e-07, 'epoch': 0.87} + 87%|████████▋ | 10562/12188 [1:11:16<3:12:28, 7.10s/it] 87%|████████▋ | 10563/12188 [1:11:23<3:12:03, 7.09s/it] {'loss': 0.2816, 'grad_norm': 0.6949952352821602, 'learning_rate': 4.5954839802385575e-07, 'epoch': 0.87} + 87%|████████▋ | 10563/12188 [1:11:23<3:12:03, 7.09s/it] 87%|████████▋ | 10564/12188 [1:11:30<3:11:40, 7.08s/it] {'loss': 0.2801, 'grad_norm': 0.6881221182313383, 'learning_rate': 4.5899213030358657e-07, 'epoch': 0.87} + 87%|████████▋ | 10564/12188 [1:11:30<3:11:40, 7.08s/it] 87%|████████▋ | 10565/12188 [1:11:37<3:10:32, 7.04s/it] {'loss': 0.2714, 'grad_norm': 0.7326630980514716, 'learning_rate': 4.5843618326199713e-07, 'epoch': 0.87} + 87%|████████▋ | 10565/12188 [1:11:37<3:10:32, 7.04s/it] 87%|████████▋ | 10566/12188 [1:11:44<3:07:23, 6.93s/it] {'loss': 0.3231, 'grad_norm': 0.7083870209158682, 'learning_rate': 4.578805569383482e-07, 'epoch': 0.87} + 87%|████████▋ | 10566/12188 [1:11:44<3:07:23, 6.93s/it] 87%|████████▋ | 10567/12188 [1:11:51<3:10:30, 7.05s/it] {'loss': 0.3037, 'grad_norm': 0.7200189939072313, 'learning_rate': 4.5732525137187733e-07, 'epoch': 0.87} + 87%|████████▋ | 10567/12188 [1:11:51<3:10:30, 7.05s/it] 87%|████████▋ | 10568/12188 [1:12:01<3:29:33, 7.76s/it] {'loss': 0.2819, 'grad_norm': 0.6893692600349182, 'learning_rate': 4.567702666017976e-07, 'epoch': 0.87} + 87%|████████▋ | 10568/12188 [1:12:01<3:29:33, 7.76s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fbcea6bd850> +[Try #0] Failed to fetch sample 4462166 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fbcea6bd850> +Problematic sample: {'image': '20240822_131046_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Organic'"}, {'from': 'gpt', 'value': '\nclick(x=0.5535, y=0.139)\n'}]} + 87%|████████▋ | 10569/12188 [1:12:09<3:31:05, 7.82s/it] {'loss': 0.2975, 'grad_norm': 0.720809896418474, 'learning_rate': 4.562156026673031e-07, 'epoch': 0.87} + 87%|████████▋ | 10569/12188 [1:12:09<3:31:05, 7.82s/it] 87%|████████▋ | 10570/12188 [1:12:16<3:24:36, 7.59s/it] {'loss': 0.316, 'grad_norm': 0.6870960957809871, 'learning_rate': 4.5566125960756324e-07, 'epoch': 0.87} + 87%|████████▋ | 10570/12188 [1:12:16<3:24:36, 7.59s/it] 87%|████████▋ | 10571/12188 [1:12:23<3:22:21, 7.51s/it] {'loss': 0.3079, 'grad_norm': 0.7107302822751841, 'learning_rate': 4.5510723746172326e-07, 'epoch': 0.87} + 87%|████████▋ | 10571/12188 [1:12:23<3:22:21, 7.51s/it] 87%|████████▋ | 10572/12188 [1:12:31<3:25:50, 7.64s/it] {'loss': 0.3149, 'grad_norm': 0.7397025860408882, 'learning_rate': 4.545535362689091e-07, 'epoch': 0.87} + 87%|████████▋ | 10572/12188 [1:12:31<3:25:50, 7.64s/it] 87%|████████▋ | 10573/12188 [1:12:38<3:18:20, 7.37s/it] {'loss': 0.3236, 'grad_norm': 0.6810261807759497, 'learning_rate': 4.5400015606822003e-07, 'epoch': 0.87} + 87%|████████▋ | 10573/12188 [1:12:38<3:18:20, 7.37s/it] 87%|████████▋ | 10574/12188 [1:12:45<3:13:35, 7.20s/it] {'loss': 0.2845, 'grad_norm': 0.7161431316830457, 'learning_rate': 4.5344709689873703e-07, 'epoch': 0.87} + 87%|████████▋ | 10574/12188 [1:12:45<3:13:35, 7.20s/it] 87%|████████▋ | 10575/12188 [1:12:51<3:09:32, 7.05s/it] {'loss': 0.2914, 'grad_norm': 0.7221352003997565, 'learning_rate': 4.5289435879951605e-07, 'epoch': 0.87} + 87%|████████▋ | 10575/12188 [1:12:51<3:09:32, 7.05s/it] 87%|████████▋ | 10576/12188 [1:12:59<3:15:59, 7.29s/it] {'loss': 0.3302, 'grad_norm': 0.7101817454258936, 'learning_rate': 4.523419418095898e-07, 'epoch': 0.87} + 87%|████████▋ | 10576/12188 [1:12:59<3:15:59, 7.29s/it] 87%|████████▋ | 10577/12188 [1:13:06<3:14:31, 7.25s/it] {'loss': 0.3252, 'grad_norm': 0.7840770268799913, 'learning_rate': 4.5178984596796815e-07, 'epoch': 0.87} + 87%|████████▋ | 10577/12188 [1:13:06<3:14:31, 7.25s/it] 87%|████████▋ | 10578/12188 [1:13:14<3:15:51, 7.30s/it] {'loss': 0.2897, 'grad_norm': 0.7050016158176936, 'learning_rate': 4.5123807131364164e-07, 'epoch': 0.87} + 87%|████████▋ | 10578/12188 [1:13:14<3:15:51, 7.30s/it] 87%|████████▋ | 10579/12188 [1:13:22<3:26:13, 7.69s/it] {'loss': 0.3033, 'grad_norm': 0.6984791569458373, 'learning_rate': 4.5068661788557345e-07, 'epoch': 0.87} + 87%|████████▋ | 10579/12188 [1:13:22<3:26:13, 7.69s/it] 87%|████████▋ | 10580/12188 [1:13:29<3:18:02, 7.39s/it] {'loss': 0.3031, 'grad_norm': 0.743683896425943, 'learning_rate': 4.5013548572270806e-07, 'epoch': 0.87} + 87%|████████▋ | 10580/12188 [1:13:29<3:18:02, 7.39s/it] 87%|████████▋ | 10581/12188 [1:13:36<3:19:04, 7.43s/it] {'loss': 0.2773, 'grad_norm': 1.5472108259906454, 'learning_rate': 4.495846748639643e-07, 'epoch': 0.87} + 87%|████████▋ | 10581/12188 [1:13:37<3:19:04, 7.43s/it] 87%|████████▋ | 10582/12188 [1:13:48<3:51:13, 8.64s/it] {'loss': 0.2793, 'grad_norm': 0.6688203088750657, 'learning_rate': 4.4903418534824053e-07, 'epoch': 0.87} + 87%|████████▋ | 10582/12188 [1:13:48<3:51:13, 8.64s/it] 87%|████████▋ | 10583/12188 [1:13:55<3:36:42, 8.10s/it] {'loss': 0.2884, 'grad_norm': 1.228571693378322, 'learning_rate': 4.4848401721440947e-07, 'epoch': 0.87} + 87%|████████▋ | 10583/12188 [1:13:55<3:36:42, 8.10s/it] 87%|████████▋ | 10584/12188 [1:14:02<3:26:26, 7.72s/it] {'loss': 0.3061, 'grad_norm': 0.6936879299512503, 'learning_rate': 4.4793417050132513e-07, 'epoch': 0.87} + 87%|████████▋ | 10584/12188 [1:14:02<3:26:26, 7.72s/it] 87%|████████▋ | 10585/12188 [1:14:08<3:18:50, 7.44s/it] {'loss': 0.2913, 'grad_norm': 0.7254082983107917, 'learning_rate': 4.47384645247817e-07, 'epoch': 0.87} + 87%|████████▋ | 10585/12188 [1:14:08<3:18:50, 7.44s/it] 87%|████████▋ | 10586/12188 [1:14:15<3:11:08, 7.16s/it] {'loss': 0.2865, 'grad_norm': 0.724802576672017, 'learning_rate': 4.468354414926901e-07, 'epoch': 0.87} + 87%|████████▋ | 10586/12188 [1:14:15<3:11:08, 7.16s/it] 87%|████████▋ | 10587/12188 [1:14:22<3:10:55, 7.16s/it] {'loss': 0.2884, 'grad_norm': 0.6828551772827268, 'learning_rate': 4.4628655927472896e-07, 'epoch': 0.87} + 87%|████████▋ | 10587/12188 [1:14:22<3:10:55, 7.16s/it] 87%|████████▋ | 10588/12188 [1:14:29<3:08:03, 7.05s/it] {'loss': 0.2846, 'grad_norm': 0.6960262516182695, 'learning_rate': 4.45737998632696e-07, 'epoch': 0.87} + 87%|████████▋ | 10588/12188 [1:14:29<3:08:03, 7.05s/it] 87%|████████▋ | 10589/12188 [1:14:36<3:06:07, 6.98s/it] {'loss': 0.3265, 'grad_norm': 0.7135449035546079, 'learning_rate': 4.4518975960532786e-07, 'epoch': 0.87} + 87%|████████▋ | 10589/12188 [1:14:36<3:06:07, 6.98s/it] 87%|████████▋ | 10590/12188 [1:14:45<3:21:59, 7.58s/it] {'loss': 0.292, 'grad_norm': 1.056814135135268, 'learning_rate': 4.4464184223134143e-07, 'epoch': 0.87} + 87%|████████▋ | 10590/12188 [1:14:45<3:21:59, 7.58s/it] 87%|████████▋ | 10591/12188 [1:14:52<3:18:19, 7.45s/it] {'loss': 0.2887, 'grad_norm': 0.6749196516553498, 'learning_rate': 4.4409424654942967e-07, 'epoch': 0.87} + 87%|████████▋ | 10591/12188 [1:14:52<3:18:19, 7.45s/it] 87%|████████▋ | 10592/12188 [1:14:59<3:12:08, 7.22s/it] {'loss': 0.3343, 'grad_norm': 0.7316441142407271, 'learning_rate': 4.4354697259826165e-07, 'epoch': 0.87} + 87%|████████▋ | 10592/12188 [1:14:59<3:12:08, 7.22s/it] 87%|████████▋ | 10593/12188 [1:15:08<3:28:47, 7.85s/it] {'loss': 0.3147, 'grad_norm': 0.7654972043902065, 'learning_rate': 4.4300002041648695e-07, 'epoch': 0.87} + 87%|████████▋ | 10593/12188 [1:15:08<3:28:47, 7.85s/it] 87%|████████▋ | 10594/12188 [1:15:15<3:22:28, 7.62s/it] {'loss': 0.2986, 'grad_norm': 1.7993050146163159, 'learning_rate': 4.42453390042728e-07, 'epoch': 0.87} + 87%|████████��� | 10594/12188 [1:15:15<3:22:28, 7.62s/it] 87%|████████▋ | 10595/12188 [1:15:22<3:18:15, 7.47s/it] {'loss': 0.2919, 'grad_norm': 0.7362171802380615, 'learning_rate': 4.419070815155896e-07, 'epoch': 0.87} + 87%|████████▋ | 10595/12188 [1:15:22<3:18:15, 7.47s/it] 87%|████████▋ | 10596/12188 [1:15:29<3:15:42, 7.38s/it] {'loss': 0.2772, 'grad_norm': 0.6977073269685569, 'learning_rate': 4.413610948736491e-07, 'epoch': 0.87} + 87%|████████▋ | 10596/12188 [1:15:29<3:15:42, 7.38s/it] 87%|████████▋ | 10597/12188 [1:15:36<3:14:13, 7.32s/it] {'loss': 0.3194, 'grad_norm': 0.6555892124998586, 'learning_rate': 4.408154301554646e-07, 'epoch': 0.87} + 87%|████████▋ | 10597/12188 [1:15:36<3:14:13, 7.32s/it] 87%|████████▋ | 10598/12188 [1:15:44<3:13:40, 7.31s/it] {'loss': 0.2853, 'grad_norm': 0.6676008948002067, 'learning_rate': 4.4027008739956964e-07, 'epoch': 0.87} + 87%|████████▋ | 10598/12188 [1:15:44<3:13:40, 7.31s/it] 87%|████████▋ | 10599/12188 [1:15:51<3:09:58, 7.17s/it] {'loss': 0.2989, 'grad_norm': 0.7200263632362686, 'learning_rate': 4.397250666444747e-07, 'epoch': 0.87} + 87%|████████▋ | 10599/12188 [1:15:51<3:09:58, 7.17s/it] 87%|████████▋ | 10600/12188 [1:15:57<3:04:02, 6.95s/it] {'loss': 0.3017, 'grad_norm': 0.8289390322000114, 'learning_rate': 4.3918036792866993e-07, 'epoch': 0.87} + 87%|████████▋ | 10600/12188 [1:15:57<3:04:02, 6.95s/it] 87%|████████▋ | 10601/12188 [1:16:04<3:04:10, 6.96s/it] {'loss': 0.2589, 'grad_norm': 0.6833222668963299, 'learning_rate': 4.3863599129061905e-07, 'epoch': 0.87} + 87%|████████▋ | 10601/12188 [1:16:04<3:04:10, 6.96s/it] 87%|████████▋ | 10602/12188 [1:16:10<3:00:36, 6.83s/it] {'loss': 0.2739, 'grad_norm': 0.7155244130616424, 'learning_rate': 4.3809193676876584e-07, 'epoch': 0.87} + 87%|████████▋ | 10602/12188 [1:16:10<3:00:36, 6.83s/it] 87%|████████▋ | 10603/12188 [1:16:17<2:58:35, 6.76s/it] {'loss': 0.2955, 'grad_norm': 0.7834651468330477, 'learning_rate': 4.375482044015322e-07, 'epoch': 0.87} + 87%|████████▋ | 10603/12188 [1:16:17<2:58:35, 6.76s/it] 87%|████████▋ | 10604/12188 [1:16:26<3:14:45, 7.38s/it] {'loss': 0.3072, 'grad_norm': 0.9084427281564397, 'learning_rate': 4.3700479422731424e-07, 'epoch': 0.87} + 87%|████████▋ | 10604/12188 [1:16:26<3:14:45, 7.38s/it] 87%|████████▋ | 10605/12188 [1:16:34<3:21:11, 7.63s/it] {'loss': 0.2721, 'grad_norm': 0.663524552238768, 'learning_rate': 4.3646170628448504e-07, 'epoch': 0.87} + 87%|████████▋ | 10605/12188 [1:16:34<3:21:11, 7.63s/it] 87%|████████▋ | 10606/12188 [1:16:41<3:13:35, 7.34s/it] {'loss': 0.3156, 'grad_norm': 0.6918935414082584, 'learning_rate': 4.3591894061140005e-07, 'epoch': 0.87} + 87%|████████▋ | 10606/12188 [1:16:41<3:13:35, 7.34s/it] 87%|████████▋ | 10607/12188 [1:16:48<3:15:47, 7.43s/it] {'loss': 0.3273, 'grad_norm': 0.7090698210945209, 'learning_rate': 4.353764972463853e-07, 'epoch': 0.87} + 87%|████████▋ | 10607/12188 [1:16:48<3:15:47, 7.43s/it] 87%|████████▋ | 10608/12188 [1:16:55<3:09:55, 7.21s/it] {'loss': 0.2862, 'grad_norm': 0.7140739468175915, 'learning_rate': 4.348343762277496e-07, 'epoch': 0.87} + 87%|████████▋ | 10608/12188 [1:16:55<3:09:55, 7.21s/it] 87%|████████▋ | 10609/12188 [1:17:02<3:06:47, 7.10s/it] {'loss': 0.2887, 'grad_norm': 0.6979832360782587, 'learning_rate': 4.3429257759377506e-07, 'epoch': 0.87} + 87%|████████▋ | 10609/12188 [1:17:02<3:06:47, 7.10s/it] 87%|████████▋ | 10610/12188 [1:17:09<3:04:08, 7.00s/it] {'loss': 0.2967, 'grad_norm': 0.7263228993907205, 'learning_rate': 4.3375110138272337e-07, 'epoch': 0.87} + 87%|████████▋ | 10610/12188 [1:17:09<3:04:08, 7.00s/it] 87%|████████▋ | 10611/12188 [1:17:15<3:00:53, 6.88s/it] {'loss': 0.3013, 'grad_norm': 0.67710521563214, 'learning_rate': 4.332099476328333e-07, 'epoch': 0.87} + 87%|████████▋ | 10611/12188 [1:17:15<3:00:53, 6.88s/it] 87%|████████▋ | 10612/12188 [1:17:22<3:01:06, 6.90s/it] {'loss': 0.3073, 'grad_norm': 0.7328070585413515, 'learning_rate': 4.326691163823188e-07, 'epoch': 0.87} + 87%|████████▋ | 10612/12188 [1:17:22<3:01:06, 6.90s/it] 87%|████████▋ | 10613/12188 [1:17:30<3:10:28, 7.26s/it] {'loss': 0.2947, 'grad_norm': 0.7924673684201058, 'learning_rate': 4.3212860766937427e-07, 'epoch': 0.87} + 87%|████████▋ | 10613/12188 [1:17:30<3:10:28, 7.26s/it] 87%|████████▋ | 10614/12188 [1:17:38<3:11:17, 7.29s/it] {'loss': 0.2962, 'grad_norm': 0.8114629513791897, 'learning_rate': 4.315884215321675e-07, 'epoch': 0.87} + 87%|████████▋ | 10614/12188 [1:17:38<3:11:17, 7.29s/it] 87%|████████▋ | 10615/12188 [1:17:44<3:05:07, 7.06s/it] {'loss': 0.2877, 'grad_norm': 0.6754396453354852, 'learning_rate': 4.3104855800884694e-07, 'epoch': 0.87} + 87%|████████▋ | 10615/12188 [1:17:44<3:05:07, 7.06s/it] 87%|████████▋ | 10616/12188 [1:17:52<3:06:32, 7.12s/it] {'loss': 0.2948, 'grad_norm': 0.7284284207042937, 'learning_rate': 4.3050901713753755e-07, 'epoch': 0.87} + 87%|████████▋ | 10616/12188 [1:17:52<3:06:32, 7.12s/it] 87%|████████▋ | 10617/12188 [1:17:58<3:03:44, 7.02s/it] {'loss': 0.3047, 'grad_norm': 0.7421136730259835, 'learning_rate': 4.2996979895633894e-07, 'epoch': 0.87} + 87%|████████▋ | 10617/12188 [1:17:58<3:03:44, 7.02s/it] 87%|████████▋ | 10618/12188 [1:18:06<3:06:07, 7.11s/it] {'loss': 0.2435, 'grad_norm': 0.6316575763486557, 'learning_rate': 4.294309035033317e-07, 'epoch': 0.87} + 87%|████████▋ | 10618/12188 [1:18:06<3:06:07, 7.11s/it] 87%|████████▋ | 10619/12188 [1:18:15<3:27:02, 7.92s/it] {'loss': 0.3093, 'grad_norm': 0.6951348284731078, 'learning_rate': 4.28892330816571e-07, 'epoch': 0.87} + 87%|████████▋ | 10619/12188 [1:18:15<3:27:02, 7.92s/it] 87%|████████▋ | 10620/12188 [1:18:22<3:18:16, 7.59s/it] {'loss': 0.2898, 'grad_norm': 0.6626748250250697, 'learning_rate': 4.283540809340886e-07, 'epoch': 0.87} + 87%|████████▋ | 10620/12188 [1:18:22<3:18:16, 7.59s/it] 87%|████████▋ | 10621/12188 [1:18:29<3:09:58, 7.27s/it] {'loss': 0.3053, 'grad_norm': 1.1529074973790994, 'learning_rate': 4.278161538938974e-07, 'epoch': 0.87} + 87%|████████▋ | 10621/12188 [1:18:29<3:09:58, 7.27s/it] 87%|████████▋ | 10622/12188 [1:18:36<3:06:30, 7.15s/it] {'loss': 0.2997, 'grad_norm': 0.6437496492020085, 'learning_rate': 4.2727854973398267e-07, 'epoch': 0.87} + 87%|████████▋ | 10622/12188 [1:18:36<3:06:30, 7.15s/it] 87%|████████▋ | 10623/12188 [1:18:43<3:04:28, 7.07s/it] {'loss': 0.3159, 'grad_norm': 0.7982568386200748, 'learning_rate': 4.2674126849231e-07, 'epoch': 0.87} + 87%|████████▋ | 10623/12188 [1:18:43<3:04:28, 7.07s/it] 87%|████████▋ | 10624/12188 [1:18:49<2:59:27, 6.88s/it] {'loss': 0.3257, 'grad_norm': 0.7414261453370182, 'learning_rate': 4.2620431020682253e-07, 'epoch': 0.87} + 87%|████████▋ | 10624/12188 [1:18:49<2:59:27, 6.88s/it] 87%|████████▋ | 10625/12188 [1:18:56<3:00:01, 6.91s/it] {'loss': 0.2789, 'grad_norm': 0.6557920158752574, 'learning_rate': 4.2566767491543706e-07, 'epoch': 0.87} + 87%|████████▋ | 10625/12188 [1:18:56<3:00:01, 6.91s/it] 87%|████████▋ | 10626/12188 [1:19:03<2:58:32, 6.86s/it] {'loss': 0.2913, 'grad_norm': 0.7003205963533056, 'learning_rate': 4.251313626560521e-07, 'epoch': 0.87} + 87%|████████▋ | 10626/12188 [1:19:03<2:58:32, 6.86s/it] 87%|████████▋ | 10627/12188 [1:19:10<3:00:31, 6.94s/it] {'loss': 0.2778, 'grad_norm': 0.8025136704571632, 'learning_rate': 4.245953734665398e-07, 'epoch': 0.87} + 87%|████████▋ | 10627/12188 [1:19:10<3:00:31, 6.94s/it] 87%|████████▋ | 10628/12188 [1:19:17<3:04:01, 7.08s/it] {'loss': 0.302, 'grad_norm': 0.7380933981335916, 'learning_rate': 4.240597073847508e-07, 'epoch': 0.87} + 87%|████████▋ | 10628/12188 [1:19:17<3:04:01, 7.08s/it] 87%|████████▋ | 10629/12188 [1:19:26<3:20:29, 7.72s/it] {'loss': 0.2763, 'grad_norm': 0.6786398302034437, 'learning_rate': 4.235243644485149e-07, 'epoch': 0.87} + 87%|████████▋ | 10629/12188 [1:19:26<3:20:29, 7.72s/it] 87%|████████▋ | 10630/12188 [1:19:33<3:09:23, 7.29s/it] {'loss': 0.2697, 'grad_norm': 0.7512990840693271, 'learning_rate': 4.22989344695634e-07, 'epoch': 0.87} + 87%|████████▋ | 10630/12188 [1:19:33<3:09:23, 7.29s/it] 87%|████████▋ | 10631/12188 [1:19:42<3:21:34, 7.77s/it] {'loss': 0.3049, 'grad_norm': 0.7049210528384273, 'learning_rate': 4.2245464816389357e-07, 'epoch': 0.87} + 87%|████████▋ | 10631/12188 [1:19:42<3:21:34, 7.77s/it] 87%|████████▋ | 10632/12188 [1:19:48<3:13:48, 7.47s/it] {'loss': 0.3026, 'grad_norm': 0.7021090333958492, 'learning_rate': 4.2192027489105113e-07, 'epoch': 0.87} + 87%|████████▋ | 10632/12188 [1:19:48<3:13:48, 7.47s/it] 87%|████████▋ | 10633/12188 [1:19:55<3:08:28, 7.27s/it] {'loss': 0.2896, 'grad_norm': 0.7142141995932553, 'learning_rate': 4.213862249148432e-07, 'epoch': 0.87} + 87%|████████▋ | 10633/12188 [1:19:55<3:08:28, 7.27s/it] 87%|████████▋ | 10634/12188 [1:20:02<3:02:35, 7.05s/it] {'loss': 0.3189, 'grad_norm': 0.748923600544931, 'learning_rate': 4.2085249827298445e-07, 'epoch': 0.87} + 87%|████████▋ | 10634/12188 [1:20:02<3:02:35, 7.05s/it] 87%|████████▋ | 10635/12188 [1:20:09<3:03:56, 7.11s/it] {'loss': 0.2909, 'grad_norm': 0.7348382269439483, 'learning_rate': 4.2031909500316493e-07, 'epoch': 0.87} + 87%|████████▋ | 10635/12188 [1:20:09<3:03:56, 7.11s/it] 87%|████████▋ | 10636/12188 [1:20:17<3:07:14, 7.24s/it] {'loss': 0.3168, 'grad_norm': 0.8239588125250739, 'learning_rate': 4.197860151430533e-07, 'epoch': 0.87} + 87%|████████▋ | 10636/12188 [1:20:17<3:07:14, 7.24s/it] 87%|████████▋ | 10637/12188 [1:20:24<3:11:38, 7.41s/it] {'loss': 0.2635, 'grad_norm': 0.6639404600069776, 'learning_rate': 4.1925325873029387e-07, 'epoch': 0.87} + 87%|████████▋ | 10637/12188 [1:20:24<3:11:38, 7.41s/it] 87%|████████▋ | 10638/12188 [1:20:35<3:35:11, 8.33s/it] {'loss': 0.3381, 'grad_norm': 0.7675647381931301, 'learning_rate': 4.1872082580250994e-07, 'epoch': 0.87} + 87%|████████▋ | 10638/12188 [1:20:35<3:35:11, 8.33s/it] 87%|████████▋ | 10639/12188 [1:20:42<3:26:33, 8.00s/it] {'loss': 0.2895, 'grad_norm': 0.7166562378061995, 'learning_rate': 4.18188716397302e-07, 'epoch': 0.87} + 87%|████████▋ | 10639/12188 [1:20:42<3:26:33, 8.00s/it] 87%|████████▋ | 10640/12188 [1:20:49<3:15:42, 7.59s/it] {'loss': 0.3045, 'grad_norm': 0.7194992925719045, 'learning_rate': 4.1765693055224434e-07, 'epoch': 0.87} + 87%|████████▋ | 10640/12188 [1:20:49<3:15:42, 7.59s/it] 87%|████████▋ | 10641/12188 [1:20:56<3:12:03, 7.45s/it] {'loss': 0.2952, 'grad_norm': 0.7060631154904886, 'learning_rate': 4.1712546830489253e-07, 'epoch': 0.87} + 87%|████████▋ | 10641/12188 [1:20:56<3:12:03, 7.45s/it] 87%|████████▋ | 10642/12188 [1:21:03<3:07:42, 7.28s/it] {'loss': 0.2971, 'grad_norm': 0.6875745875014164, 'learning_rate': 4.165943296927777e-07, 'epoch': 0.87} + 87%|████████▋ | 10642/12188 [1:21:03<3:07:42, 7.28s/it] 87%|████████▋ | 10643/12188 [1:21:10<3:05:21, 7.20s/it] {'loss': 0.3062, 'grad_norm': 0.7203223504252293, 'learning_rate': 4.1606351475340703e-07, 'epoch': 0.87} + 87%|████████▋ | 10643/12188 [1:21:10<3:05:21, 7.20s/it] 87%|████████▋ | 10644/12188 [1:21:17<3:03:09, 7.12s/it] {'loss': 0.296, 'grad_norm': 0.7209818029433145, 'learning_rate': 4.1553302352426715e-07, 'epoch': 0.87} + 87%|████████▋ | 10644/12188 [1:21:17<3:03:09, 7.12s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f3eca6c3d30> +[Try #0] Failed to fetch sample 4670501 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f3eca6c3d30> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Main Page'"}, {'from': 'gpt', 'value': '\nclick(x=0.865, y=0.212)\n'}]} + 87%|████████▋ | 10645/12188 [1:21:24<3:05:30, 7.21s/it] {'loss': 0.2522, 'grad_norm': 0.7590210156898725, 'learning_rate': 4.1500285604281877e-07, 'epoch': 0.87} + 87%|████████▋ | 10645/12188 [1:21:24<3:05:30, 7.21s/it] 87%|████████▋ | 10646/12188 [1:21:31<3:01:59, 7.08s/it] {'loss': 0.3256, 'grad_norm': 0.7440886849404142, 'learning_rate': 4.1447301234650297e-07, 'epoch': 0.87} + 87%|████████▋ | 10646/12188 [1:21:31<3:01:59, 7.08s/it] 87%|████████▋ | 10647/12188 [1:21:38<3:00:01, 7.01s/it] {'loss': 0.2653, 'grad_norm': 0.6372723465122311, 'learning_rate': 4.139434924727359e-07, 'epoch': 0.87} + 87%|████████▋ | 10647/12188 [1:21:38<3:00:01, 7.01s/it] 87%|████████▋ | 10648/12188 [1:21:45<3:01:22, 7.07s/it] {'loss': 0.3177, 'grad_norm': 0.6998607932217216, 'learning_rate': 4.134142964589105e-07, 'epoch': 0.87} + 87%|████████▋ | 10648/12188 [1:21:45<3:01:22, 7.07s/it] 87%|████████▋ | 10649/12188 [1:21:52<2:58:48, 6.97s/it] {'loss': 0.2889, 'grad_norm': 0.6869760696114136, 'learning_rate': 4.128854243423991e-07, 'epoch': 0.87} + 87%|████████▋ | 10649/12188 [1:21:52<2:58:48, 6.97s/it] 87%|████████▋ | 10650/12188 [1:21:59<3:00:26, 7.04s/it] {'loss': 0.3026, 'grad_norm': 0.7119880325951097, 'learning_rate': 4.1235687616054897e-07, 'epoch': 0.87} + 87%|████████▋ | 10650/12188 [1:21:59<3:00:26, 7.04s/it] 87%|████████▋ | 10651/12188 [1:22:07<3:06:17, 7.27s/it] {'loss': 0.2956, 'grad_norm': 1.3967181444891934, 'learning_rate': 4.1182865195068535e-07, 'epoch': 0.87} + 87%|████████▋ | 10651/12188 [1:22:07<3:06:17, 7.27s/it] 87%|████████▋ | 10652/12188 [1:22:16<3:24:58, 8.01s/it] {'loss': 0.2908, 'grad_norm': 0.7178712852753787, 'learning_rate': 4.1130075175011163e-07, 'epoch': 0.87} + 87%|████████▋ | 10652/12188 [1:22:16<3:24:58, 8.01s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f66846cabb0> +[Try #0] Failed to fetch sample 4749808 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f66846cabb0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Wikimedia Foundation'"}, {'from': 'gpt', 'value': '\nclick(x=0.9215, y=0.6395)\n'}]} + 87%|████████▋ | 10653/12188 [1:22:23<3:16:06, 7.67s/it] {'loss': 0.2826, 'grad_norm': 0.7067391923393348, 'learning_rate': 4.1077317559610584e-07, 'epoch': 0.87} + 87%|████████▋ | 10653/12188 [1:22:23<3:16:06, 7.67s/it] 87%|████████▋ | 10654/12188 [1:22:30<3:12:32, 7.53s/it] {'loss': 0.2823, 'grad_norm': 0.6614099364316512, 'learning_rate': 4.10245923525926e-07, 'epoch': 0.87} + 87%|████████▋ | 10654/12188 [1:22:30<3:12:32, 7.53s/it] 87%|████████▋ | 10655/12188 [1:22:37<3:06:00, 7.28s/it] {'loss': 0.2903, 'grad_norm': 0.7000989753498861, 'learning_rate': 4.097189955768038e-07, 'epoch': 0.87} + 87%|████████▋ | 10655/12188 [1:22:37<3:06:00, 7.28s/it] 87%|████████▋ | 10656/12188 [1:22:44<3:05:01, 7.25s/it] {'loss': 0.2663, 'grad_norm': 0.6334926866750158, 'learning_rate': 4.0919239178595194e-07, 'epoch': 0.87} + 87%|████████▋ | 10656/12188 [1:22:44<3:05:01, 7.25s/it] 87%|████████▋ | 10657/12188 [1:22:52<3:05:45, 7.28s/it] {'loss': 0.2528, 'grad_norm': 0.667058669409739, 'learning_rate': 4.0866611219055827e-07, 'epoch': 0.87} + 87%|████████▋ | 10657/12188 [1:22:52<3:05:45, 7.28s/it] 87%|████████▋ | 10658/12188 [1:22:59<3:07:02, 7.33s/it] {'loss': 0.3028, 'grad_norm': 0.7234879226126119, 'learning_rate': 4.081401568277865e-07, 'epoch': 0.87} + 87%|████████▋ | 10658/12188 [1:22:59<3:07:02, 7.33s/it] 87%|████████▋ | 10659/12188 [1:23:06<3:04:10, 7.23s/it] {'loss': 0.3001, 'grad_norm': 0.7869074609457788, 'learning_rate': 4.076145257347802e-07, 'epoch': 0.87} + 87%|████████▋ | 10659/12188 [1:23:06<3:04:10, 7.23s/it] 87%|████████▋ | 10660/12188 [1:23:13<3:01:16, 7.12s/it] {'loss': 0.3043, 'grad_norm': 0.7341273122882264, 'learning_rate': 4.0708921894865747e-07, 'epoch': 0.87} + 87%|████████▋ | 10660/12188 [1:23:13<3:01:16, 7.12s/it] 87%|████████▋ | 10661/12188 [1:23:20<2:58:50, 7.03s/it] {'loss': 0.3301, 'grad_norm': 0.6914212212415541, 'learning_rate': 4.0656423650651477e-07, 'epoch': 0.87} + 87%|████████▋ | 10661/12188 [1:23:20<2:58:50, 7.03s/it] 87%|████████▋ | 10662/12188 [1:23:26<2:56:00, 6.92s/it] {'loss': 0.3299, 'grad_norm': 0.7247265731778423, 'learning_rate': 4.060395784454263e-07, 'epoch': 0.87} + 87%|████████▋ | 10662/12188 [1:23:26<2:56:00, 6.92s/it] 87%|████████▋ | 10663/12188 [1:23:33<2:54:01, 6.85s/it] {'loss': 0.3124, 'grad_norm': 2.2362299951657123, 'learning_rate': 4.055152448024413e-07, 'epoch': 0.87} + 87%|████████▋ | 10663/12188 [1:23:33<2:54:01, 6.85s/it] 87%|████████▋ | 10664/12188 [1:23:42<3:09:14, 7.45s/it] {'loss': 0.2703, 'grad_norm': 0.7219909891989075, 'learning_rate': 4.0499123561458795e-07, 'epoch': 0.87} + 87%|████████▋ | 10664/12188 [1:23:42<3:09:14, 7.45s/it] 88%|████████▊ | 10665/12188 [1:23:49<3:05:53, 7.32s/it] {'loss': 0.2915, 'grad_norm': 0.7583775927541824, 'learning_rate': 4.044675509188722e-07, 'epoch': 0.88} + 88%|████████▊ | 10665/12188 [1:23:49<3:05:53, 7.32s/it] 88%|████████▊ | 10666/12188 [1:23:56<2:59:38, 7.08s/it] {'loss': 0.2964, 'grad_norm': 0.7759714110303447, 'learning_rate': 4.03944190752274e-07, 'epoch': 0.88} + 88%|████████▊ | 10666/12188 [1:23:56<2:59:38, 7.08s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fa97727efc0> +[Try #0] Failed to fetch sample 4439689 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fa97727efc0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Page information'"}, {'from': 'gpt', 'value': '\nclick(x=0.865, y=0.442)\n'}]} + 88%|████████▊ | 10667/12188 [1:24:06<3:24:56, 8.08s/it] {'loss': 0.2844, 'grad_norm': 0.7176308524455763, 'learning_rate': 4.0342115515175363e-07, 'epoch': 0.88} + 88%|████████▊ | 10667/12188 [1:24:06<3:24:56, 8.08s/it] 88%|████████▊ | 10668/12188 [1:24:13<3:16:04, 7.74s/it] {'loss': 0.3159, 'grad_norm': 0.8027920439018081, 'learning_rate': 4.0289844415424497e-07, 'epoch': 0.88} + 88%|████████▊ | 10668/12188 [1:24:13<3:16:04, 7.74s/it] 88%|████████▊ | 10669/12188 [1:24:22<3:27:03, 8.18s/it] {'loss': 0.2604, 'grad_norm': 0.6576497989066044, 'learning_rate': 4.023760577966629e-07, 'epoch': 0.88} + 88%|████████▊ | 10669/12188 [1:24:22<3:27:03, 8.18s/it] 88%|████████▊ | 10670/12188 [1:24:30<3:24:07, 8.07s/it] {'loss': 0.2712, 'grad_norm': 0.8224573565194759, 'learning_rate': 4.0185399611589794e-07, 'epoch': 0.88} + 88%|████████▊ | 10670/12188 [1:24:30<3:24:07, 8.07s/it] 88%|████████▊ | 10671/12188 [1:24:37<3:14:43, 7.70s/it] {'loss': 0.3019, 'grad_norm': 0.6502921676583979, 'learning_rate': 4.01332259148815e-07, 'epoch': 0.88} + 88%|████████▊ | 10671/12188 [1:24:37<3:14:43, 7.70s/it] 88%|████████▊ | 10672/12188 [1:24:43<3:06:25, 7.38s/it] {'loss': 0.2785, 'grad_norm': 0.7263592227251334, 'learning_rate': 4.008108469322608e-07, 'epoch': 0.88} + 88%|████████▊ | 10672/12188 [1:24:43<3:06:25, 7.38s/it] 88%|████████▊ | 10673/12188 [1:24:50<3:00:38, 7.15s/it] {'loss': 0.2694, 'grad_norm': 0.7668094858550537, 'learning_rate': 4.0028975950305416e-07, 'epoch': 0.88} + 88%|████████▊ | 10673/12188 [1:24:50<3:00:38, 7.15s/it] 88%|████████▊ | 10674/12188 [1:24:57<2:56:29, 6.99s/it] {'loss': 0.2815, 'grad_norm': 0.7435504728344147, 'learning_rate': 3.997689968979962e-07, 'epoch': 0.88} + 88%|████████▊ | 10674/12188 [1:24:57<2:56:29, 6.99s/it] 88%|████████▊ | 10675/12188 [1:25:04<2:57:18, 7.03s/it] {'loss': 0.3246, 'grad_norm': 0.7305170140901994, 'learning_rate': 3.992485591538603e-07, 'epoch': 0.88} + 88%|████████▊ | 10675/12188 [1:25:04<2:57:18, 7.03s/it] 88%|████████▊ | 10676/12188 [1:25:11<2:56:51, 7.02s/it] {'loss': 0.2947, 'grad_norm': 0.6870790677421496, 'learning_rate': 3.987284463073987e-07, 'epoch': 0.88} + 88%|████████▊ | 10676/12188 [1:25:11<2:56:51, 7.02s/it] 88%|████████▊ | 10677/12188 [1:25:18<2:59:45, 7.14s/it] {'loss': 0.2932, 'grad_norm': 0.6804007488173737, 'learning_rate': 3.9820865839534206e-07, 'epoch': 0.88} + 88%|████████▊ | 10677/12188 [1:25:18<2:59:45, 7.14s/it] 88%|████████▊ | 10678/12188 [1:25:26<3:02:44, 7.26s/it] {'loss': 0.3088, 'grad_norm': 0.8290679873803678, 'learning_rate': 3.976891954543971e-07, 'epoch': 0.88} + 88%|████████▊ | 10678/12188 [1:25:26<3:02:44, 7.26s/it] 88%|████████▊ | 10679/12188 [1:25:33<3:06:05, 7.40s/it] {'loss': 0.2991, 'grad_norm': 0.6807698564904336, 'learning_rate': 3.971700575212462e-07, 'epoch': 0.88} + 88%|████████▊ | 10679/12188 [1:25:33<3:06:05, 7.40s/it] 88%|████████▊ | 10680/12188 [1:25:40<3:02:35, 7.26s/it] {'loss': 0.3092, 'grad_norm': 0.6788653068082897, 'learning_rate': 3.966512446325521e-07, 'epoch': 0.88} + 88%|████████▊ | 10680/12188 [1:25:40<3:02:35, 7.26s/it] 88%|████████▊ | 10681/12188 [1:25:48<3:05:44, 7.40s/it] {'loss': 0.3056, 'grad_norm': 0.7734537263480676, 'learning_rate': 3.9613275682494956e-07, 'epoch': 0.88} + 88%|████████▊ | 10681/12188 [1:25:48<3:05:44, 7.40s/it] 88%|████████▊ | 10682/12188 [1:25:58<3:23:02, 8.09s/it] {'loss': 0.3364, 'grad_norm': 0.686923600301431, 'learning_rate': 3.956145941350559e-07, 'epoch': 0.88} + 88%|████████▊ | 10682/12188 [1:25:58<3:23:02, 8.09s/it] 88%|████████▊ | 10683/12188 [1:26:05<3:13:48, 7.73s/it] {'loss': 0.311, 'grad_norm': 0.6605550307942503, 'learning_rate': 3.9509675659946234e-07, 'epoch': 0.88} + 88%|████████▊ | 10683/12188 [1:26:05<3:13:48, 7.73s/it] 88%|████████▊ | 10684/12188 [1:26:12<3:09:13, 7.55s/it] {'loss': 0.2954, 'grad_norm': 0.7246043584538278, 'learning_rate': 3.9457924425473694e-07, 'epoch': 0.88} + 88%|████████▊ | 10684/12188 [1:26:12<3:09:13, 7.55s/it] 88%|████████▊ | 10685/12188 [1:26:19<3:09:17, 7.56s/it] {'loss': 0.3182, 'grad_norm': 0.6823903288313646, 'learning_rate': 3.940620571374271e-07, 'epoch': 0.88} + 88%|████████▊ | 10685/12188 [1:26:19<3:09:17, 7.56s/it] 88%|████████▊ | 10686/12188 [1:26:27<3:06:28, 7.45s/it] {'loss': 0.3079, 'grad_norm': 0.6908398203335342, 'learning_rate': 3.9354519528405357e-07, 'epoch': 0.88} + 88%|████████▊ | 10686/12188 [1:26:27<3:06:28, 7.45s/it] 88%|████████▊ | 10687/12188 [1:26:33<3:02:04, 7.28s/it] {'loss': 0.3612, 'grad_norm': 0.7915320044742858, 'learning_rate': 3.9302865873111884e-07, 'epoch': 0.88} + 88%|████████▊ | 10687/12188 [1:26:33<3:02:04, 7.28s/it] 88%|████████▊ | 10688/12188 [1:26:40<2:59:36, 7.18s/it] {'loss': 0.3011, 'grad_norm': 0.7123590176082698, 'learning_rate': 3.925124475150982e-07, 'epoch': 0.88} + 88%|████████▊ | 10688/12188 [1:26:40<2:59:36, 7.18s/it] 88%|████████▊ | 10689/12188 [1:26:48<3:00:54, 7.24s/it] {'loss': 0.2757, 'grad_norm': 0.6846521126708313, 'learning_rate': 3.9199656167244525e-07, 'epoch': 0.88} + 88%|████████▊ | 10689/12188 [1:26:48<3:00:54, 7.24s/it] 88%|████████▊ | 10690/12188 [1:26:55<3:00:10, 7.22s/it] {'loss': 0.2789, 'grad_norm': 0.7301866643949786, 'learning_rate': 3.914810012395914e-07, 'epoch': 0.88} + 88%|████████▊ | 10690/12188 [1:26:55<3:00:10, 7.22s/it] 88%|████████▊ | 10691/12188 [1:27:02<2:56:03, 7.06s/it] {'loss': 0.2961, 'grad_norm': 0.6826605498046311, 'learning_rate': 3.909657662529465e-07, 'epoch': 0.88} + 88%|████████▊ | 10691/12188 [1:27:02<2:56:03, 7.06s/it] 88%|████████▊ | 10692/12188 [1:27:09<2:56:18, 7.07s/it] {'loss': 0.2722, 'grad_norm': 0.6870922636630897, 'learning_rate': 3.904508567488935e-07, 'epoch': 0.88} + 88%|████████▊ | 10692/12188 [1:27:09<2:56:18, 7.07s/it] 88%|████████▊ | 10693/12188 [1:27:15<2:52:53, 6.94s/it] {'loss': 0.3161, 'grad_norm': 0.7247235565707109, 'learning_rate': 3.8993627276379564e-07, 'epoch': 0.88} + 88%|████████▊ | 10693/12188 [1:27:15<2:52:53, 6.94s/it] 88%|████████▊ | 10694/12188 [1:27:22<2:50:11, 6.83s/it] {'loss': 0.2818, 'grad_norm': 0.6809995361048672, 'learning_rate': 3.894220143339905e-07, 'epoch': 0.88} + 88%|████████▊ | 10694/12188 [1:27:22<2:50:11, 6.83s/it] 88%|████████▊ | 10695/12188 [1:27:29<2:51:19, 6.89s/it] {'loss': 0.3349, 'grad_norm': 0.6936624301387705, 'learning_rate': 3.8890808149579573e-07, 'epoch': 0.88} + 88%|████████▊ | 10695/12188 [1:27:29<2:51:19, 6.89s/it] 88%|████████▊ | 10696/12188 [1:27:37<2:57:18, 7.13s/it] {'loss': 0.2882, 'grad_norm': 0.694886775239229, 'learning_rate': 3.883944742855045e-07, 'epoch': 0.88} + 88%|████████▊ | 10696/12188 [1:27:37<2:57:18, 7.13s/it] 88%|████████▊ | 10697/12188 [1:27:44<3:00:37, 7.27s/it] {'loss': 0.285, 'grad_norm': 0.6652252252059668, 'learning_rate': 3.8788119273938606e-07, 'epoch': 0.88} + 88%|████████▊ | 10697/12188 [1:27:44<3:00:37, 7.27s/it] 88%|████████▊ | 10698/12188 [1:27:51<2:58:28, 7.19s/it] {'loss': 0.2916, 'grad_norm': 0.7631551902480868, 'learning_rate': 3.8736823689368817e-07, 'epoch': 0.88} + 88%|████████▊ | 10698/12188 [1:27:51<2:58:28, 7.19s/it] 88%|████████▊ | 10699/12188 [1:28:01<3:18:22, 7.99s/it] {'loss': 0.2804, 'grad_norm': 0.9621070572959409, 'learning_rate': 3.8685560678463405e-07, 'epoch': 0.88} + 88%|████████▊ | 10699/12188 [1:28:01<3:18:22, 7.99s/it] 88%|████████▊ | 10700/12188 [1:28:08<3:09:51, 7.66s/it] {'loss': 0.279, 'grad_norm': 0.8423089197440218, 'learning_rate': 3.863433024484259e-07, 'epoch': 0.88} + 88%|████████▊ | 10700/12188 [1:28:08<3:09:51, 7.66s/it] 88%|████████▊ | 10701/12188 [1:28:15<3:04:11, 7.43s/it] {'loss': 0.2649, 'grad_norm': 0.7400695924889888, 'learning_rate': 3.858313239212408e-07, 'epoch': 0.88} + 88%|████████▊ | 10701/12188 [1:28:15<3:04:11, 7.43s/it] 88%|████████▊ | 10702/12188 [1:28:23<3:07:21, 7.57s/it] {'loss': 0.2968, 'grad_norm': 0.6973139319185422, 'learning_rate': 3.8531967123923486e-07, 'epoch': 0.88} + 88%|████████▊ | 10702/12188 [1:28:23<3:07:21, 7.57s/it] 88%|████████▊ | 10703/12188 [1:28:30<3:04:27, 7.45s/it] {'loss': 0.3061, 'grad_norm': 0.7302795208692383, 'learning_rate': 3.8480834443854034e-07, 'epoch': 0.88} + 88%|████████▊ | 10703/12188 [1:28:30<3:04:27, 7.45s/it] 88%|████████▊ | 10704/12188 [1:28:37<2:59:42, 7.27s/it] {'loss': 0.3082, 'grad_norm': 1.0640676297326044, 'learning_rate': 3.8429734355526437e-07, 'epoch': 0.88} + 88%|████████▊ | 10704/12188 [1:28:37<2:59:42, 7.27s/it] 88%|████████▊ | 10705/12188 [1:28:44<3:00:18, 7.30s/it] {'loss': 0.309, 'grad_norm': 0.6836291557835369, 'learning_rate': 3.837866686254943e-07, 'epoch': 0.88} + 88%|████████▊ | 10705/12188 [1:28:44<3:00:18, 7.30s/it] 88%|████████▊ | 10706/12188 [1:28:51<2:54:50, 7.08s/it] {'loss': 0.3026, 'grad_norm': 0.6966795541215912, 'learning_rate': 3.83276319685294e-07, 'epoch': 0.88} + 88%|████████▊ | 10706/12188 [1:28:51<2:54:50, 7.08s/it] 88%|████████▊ | 10707/12188 [1:28:57<2:52:31, 6.99s/it] {'loss': 0.3713, 'grad_norm': 1.011743993973967, 'learning_rate': 3.827662967707019e-07, 'epoch': 0.88} + 88%|████████▊ | 10707/12188 [1:28:57<2:52:31, 6.99s/it] 88%|████████▊ | 10708/12188 [1:29:04<2:48:50, 6.84s/it] {'loss': 0.313, 'grad_norm': 0.7028130329346055, 'learning_rate': 3.822565999177358e-07, 'epoch': 0.88} + 88%|████████▊ | 10708/12188 [1:29:04<2:48:50, 6.84s/it] 88%|████████▊ | 10709/12188 [1:29:11<2:47:11, 6.78s/it] {'loss': 0.2755, 'grad_norm': 0.736021558052495, 'learning_rate': 3.817472291623908e-07, 'epoch': 0.88} + 88%|████████▊ | 10709/12188 [1:29:11<2:47:11, 6.78s/it] 88%|████████▊ | 10710/12188 [1:29:18<2:48:12, 6.83s/it] {'loss': 0.2604, 'grad_norm': 0.6604024487802794, 'learning_rate': 3.812381845406354e-07, 'epoch': 0.88} + 88%|████████▊ | 10710/12188 [1:29:18<2:48:12, 6.83s/it] 88%|████████▊ | 10711/12188 [1:29:24<2:47:24, 6.80s/it] {'loss': 0.3089, 'grad_norm': 0.7207801358365983, 'learning_rate': 3.807294660884198e-07, 'epoch': 0.88} + 88%|████████▊ | 10711/12188 [1:29:24<2:47:24, 6.80s/it] 88%|████████▊ | 10712/12188 [1:29:32<2:51:48, 6.98s/it] {'loss': 0.2759, 'grad_norm': 0.7794356061790533, 'learning_rate': 3.8022107384166676e-07, 'epoch': 0.88} + 88%|████████▊ | 10712/12188 [1:29:32<2:51:48, 6.98s/it] 88%|████████▊ | 10713/12188 [1:29:39<2:54:08, 7.08s/it] {'loss': 0.2913, 'grad_norm': 0.6653694485352035, 'learning_rate': 3.7971300783627987e-07, 'epoch': 0.88} + 88%|████████▊ | 10713/12188 [1:29:39<2:54:08, 7.08s/it] 88%|████████▊ | 10714/12188 [1:29:47<3:01:26, 7.39s/it] {'loss': 0.2942, 'grad_norm': 0.8153220995742125, 'learning_rate': 3.7920526810813763e-07, 'epoch': 0.88} + 88%|████████▊ | 10714/12188 [1:29:47<3:01:26, 7.39s/it] 88%|████████▊ | 10715/12188 [1:29:54<2:56:19, 7.18s/it] {'loss': 0.3034, 'grad_norm': 0.7063440013487704, 'learning_rate': 3.7869785469309526e-07, 'epoch': 0.88} + 88%|████████▊ | 10715/12188 [1:29:54<2:56:19, 7.18s/it] 88%|████████▊ | 10716/12188 [1:30:01<2:53:22, 7.07s/it] {'loss': 0.3243, 'grad_norm': 0.7301637231725094, 'learning_rate': 3.781907676269864e-07, 'epoch': 0.88} + 88%|████████▊ | 10716/12188 [1:30:01<2:53:22, 7.07s/it] 88%|████████▊ | 10717/12188 [1:30:07<2:51:08, 6.98s/it] {'loss': 0.3103, 'grad_norm': 0.7150024263217332, 'learning_rate': 3.776840069456189e-07, 'epoch': 0.88} + 88%|████████▊ | 10717/12188 [1:30:07<2:51:08, 6.98s/it] 88%|████████▊ | 10718/12188 [1:30:14<2:50:58, 6.98s/it] {'loss': 0.2607, 'grad_norm': 0.6970414625109904, 'learning_rate': 3.7717757268478037e-07, 'epoch': 0.88} + 88%|████████▊ | 10718/12188 [1:30:14<2:50:58, 6.98s/it] 88%|████████▊ | 10719/12188 [1:30:21<2:49:56, 6.94s/it] {'loss': 0.3136, 'grad_norm': 0.7893684246532853, 'learning_rate': 3.7667146488023554e-07, 'epoch': 0.88} + 88%|████████▊ | 10719/12188 [1:30:21<2:49:56, 6.94s/it] 88%|████████▊ | 10720/12188 [1:30:29<2:58:00, 7.28s/it] {'loss': 0.2747, 'grad_norm': 0.8530051678659988, 'learning_rate': 3.76165683567723e-07, 'epoch': 0.88} + 88%|████████▊ | 10720/12188 [1:30:29<2:58:00, 7.28s/it] 88%|████████▊ | 10721/12188 [1:30:36<2:56:24, 7.21s/it] {'loss': 0.3298, 'grad_norm': 0.6857964361731975, 'learning_rate': 3.7566022878296194e-07, 'epoch': 0.88} + 88%|█████��██▊ | 10721/12188 [1:30:36<2:56:24, 7.21s/it] 88%|████████▊ | 10722/12188 [1:30:46<3:11:17, 7.83s/it] {'loss': 0.292, 'grad_norm': 0.7261320442303525, 'learning_rate': 3.7515510056164493e-07, 'epoch': 0.88} + 88%|████████▊ | 10722/12188 [1:30:46<3:11:17, 7.83s/it] 88%|████████▊ | 10723/12188 [1:30:53<3:05:09, 7.58s/it] {'loss': 0.2958, 'grad_norm': 0.7336511778846666, 'learning_rate': 3.746502989394446e-07, 'epoch': 0.88} + 88%|████████▊ | 10723/12188 [1:30:53<3:05:09, 7.58s/it] 88%|████████▊ | 10724/12188 [1:31:00<3:00:38, 7.40s/it] {'loss': 0.2805, 'grad_norm': 1.0639408803500643, 'learning_rate': 3.741458239520096e-07, 'epoch': 0.88} + 88%|████████▊ | 10724/12188 [1:31:00<3:00:38, 7.40s/it] 88%|████████▊ | 10725/12188 [1:31:07<2:57:50, 7.29s/it] {'loss': 0.3131, 'grad_norm': 0.6847913795603848, 'learning_rate': 3.736416756349642e-07, 'epoch': 0.88} + 88%|████████▊ | 10725/12188 [1:31:07<2:57:50, 7.29s/it] 88%|████████▊ | 10726/12188 [1:31:13<2:53:47, 7.13s/it] {'loss': 0.3199, 'grad_norm': 0.8334359429853826, 'learning_rate': 3.7313785402391054e-07, 'epoch': 0.88} + 88%|████████▊ | 10726/12188 [1:31:13<2:53:47, 7.13s/it] 88%|████████▊ | 10727/12188 [1:31:21<2:55:43, 7.22s/it] {'loss': 0.2781, 'grad_norm': 0.8548442373018286, 'learning_rate': 3.7263435915442846e-07, 'epoch': 0.88} + 88%|████████▊ | 10727/12188 [1:31:21<2:55:43, 7.22s/it] 88%|████████▊ | 10728/12188 [1:31:27<2:51:35, 7.05s/it] {'loss': 0.2887, 'grad_norm': 0.8357344627894595, 'learning_rate': 3.72131191062074e-07, 'epoch': 0.88} + 88%|████████▊ | 10728/12188 [1:31:28<2:51:35, 7.05s/it] 88%|████████▊ | 10729/12188 [1:31:36<3:01:02, 7.44s/it] {'loss': 0.3509, 'grad_norm': 0.7295745280643023, 'learning_rate': 3.716283497823786e-07, 'epoch': 0.88} + 88%|████████▊ | 10729/12188 [1:31:36<3:01:02, 7.44s/it] 88%|████████▊ | 10730/12188 [1:31:43<2:57:39, 7.31s/it] {'loss': 0.3097, 'grad_norm': 0.9860546020344879, 'learning_rate': 3.71125835350854e-07, 'epoch': 0.88} + 88%|████████▊ | 10730/12188 [1:31:43<2:57:39, 7.31s/it] 88%|████████▊ | 10731/12188 [1:31:50<2:56:19, 7.26s/it] {'loss': 0.3319, 'grad_norm': 0.6936643963418342, 'learning_rate': 3.706236478029851e-07, 'epoch': 0.88} + 88%|████████▊ | 10731/12188 [1:31:50<2:56:19, 7.26s/it] 88%|████████▊ | 10732/12188 [1:31:57<2:53:47, 7.16s/it] {'loss': 0.3393, 'grad_norm': 0.7390591493959974, 'learning_rate': 3.7012178717423744e-07, 'epoch': 0.88} + 88%|████████▊ | 10732/12188 [1:31:57<2:53:47, 7.16s/it] 88%|████████▊ | 10733/12188 [1:32:04<2:53:47, 7.17s/it] {'loss': 0.3297, 'grad_norm': 0.7715508560890467, 'learning_rate': 3.6962025350004983e-07, 'epoch': 0.88} + 88%|████████▊ | 10733/12188 [1:32:04<2:53:47, 7.17s/it] 88%|████████▊ | 10734/12188 [1:32:11<2:50:29, 7.04s/it] {'loss': 0.2773, 'grad_norm': 0.7429980174607828, 'learning_rate': 3.691190468158412e-07, 'epoch': 0.88} + 88%|████████▊ | 10734/12188 [1:32:11<2:50:29, 7.04s/it] 88%|████████▊ | 10735/12188 [1:32:18<2:52:30, 7.12s/it] {'loss': 0.2796, 'grad_norm': 0.6973019725490931, 'learning_rate': 3.686181671570049e-07, 'epoch': 0.88} + 88%|████████▊ | 10735/12188 [1:32:18<2:52:30, 7.12s/it] 88%|████████▊ | 10736/12188 [1:32:26<2:54:50, 7.22s/it] {'loss': 0.2818, 'grad_norm': 0.7462494354335634, 'learning_rate': 3.681176145589127e-07, 'epoch': 0.88} + 88%|████████▊ | 10736/12188 [1:32:26<2:54:50, 7.22s/it] 88%|████████▊ | 10737/12188 [1:32:32<2:48:58, 6.99s/it] {'loss': 0.2759, 'grad_norm': 0.6786958739291334, 'learning_rate': 3.676173890569135e-07, 'epoch': 0.88} + 88%|████████▊ | 10737/12188 [1:32:32<2:48:58, 6.99s/it] 88%|████████▊ | 10738/12188 [1:32:39<2:47:44, 6.94s/it] {'loss': 0.2502, 'grad_norm': 0.7345476786957887, 'learning_rate': 3.671174906863312e-07, 'epoch': 0.88} + 88%|████████▊ | 10738/12188 [1:32:39<2:47:44, 6.94s/it] 88%|████████▊ | 10739/12188 [1:32:46<2:47:42, 6.94s/it] {'loss': 0.2828, 'grad_norm': 1.0668949393998937, 'learning_rate': 3.666179194824693e-07, 'epoch': 0.88} + 88%|████████▊ | 10739/12188 [1:32:46<2:47:42, 6.94s/it] 88%|████████▊ | 10740/12188 [1:32:53<2:48:17, 6.97s/it] {'loss': 0.3308, 'grad_norm': 0.6721842476438258, 'learning_rate': 3.661186754806051e-07, 'epoch': 0.88} + 88%|████████▊ | 10740/12188 [1:32:53<2:48:17, 6.97s/it] 88%|████████▊ | 10741/12188 [1:33:00<2:49:47, 7.04s/it] {'loss': 0.2771, 'grad_norm': 0.7577266404010189, 'learning_rate': 3.6561975871599487e-07, 'epoch': 0.88} + 88%|████████▊ | 10741/12188 [1:33:00<2:49:47, 7.04s/it] 88%|████████▊ | 10742/12188 [1:33:07<2:51:47, 7.13s/it] {'loss': 0.3157, 'grad_norm': 0.690618449153438, 'learning_rate': 3.6512116922387264e-07, 'epoch': 0.88} + 88%|████████▊ | 10742/12188 [1:33:07<2:51:47, 7.13s/it] 88%|████████▊ | 10743/12188 [1:33:15<2:52:11, 7.15s/it] {'loss': 0.3076, 'grad_norm': 0.7361288135796112, 'learning_rate': 3.646229070394464e-07, 'epoch': 0.88} + 88%|████████▊ | 10743/12188 [1:33:15<2:52:11, 7.15s/it] 88%|████████▊ | 10744/12188 [1:33:24<3:04:38, 7.67s/it] {'loss': 0.3274, 'grad_norm': 0.6923240710525015, 'learning_rate': 3.641249721979029e-07, 'epoch': 0.88} + 88%|████████▊ | 10744/12188 [1:33:24<3:04:38, 7.67s/it] 88%|████████▊ | 10745/12188 [1:33:30<2:56:53, 7.36s/it] {'loss': 0.2959, 'grad_norm': 0.7332616816696536, 'learning_rate': 3.636273647344063e-07, 'epoch': 0.88} + 88%|████████▊ | 10745/12188 [1:33:30<2:56:53, 7.36s/it] 88%|████████▊ | 10746/12188 [1:33:37<2:52:15, 7.17s/it] {'loss': 0.318, 'grad_norm': 0.7502622705181693, 'learning_rate': 3.631300846840957e-07, 'epoch': 0.88} + 88%|████████▊ | 10746/12188 [1:33:37<2:52:15, 7.17s/it] 88%|████████▊ | 10747/12188 [1:33:44<2:53:59, 7.24s/it] {'loss': 0.3042, 'grad_norm': 0.7269814144457117, 'learning_rate': 3.626331320820897e-07, 'epoch': 0.88} + 88%|████████▊ | 10747/12188 [1:33:44<2:53:59, 7.24s/it] 88%|████████▊ | 10748/12188 [1:33:52<2:56:38, 7.36s/it] {'loss': 0.3479, 'grad_norm': 0.6895535375539081, 'learning_rate': 3.6213650696348025e-07, 'epoch': 0.88} + 88%|████████▊ | 10748/12188 [1:33:52<2:56:38, 7.36s/it] 88%|████████▊ | 10749/12188 [1:33:59<2:54:26, 7.27s/it] {'loss': 0.304, 'grad_norm': 0.6874793263742873, 'learning_rate': 3.616402093633398e-07, 'epoch': 0.88} + 88%|████████▊ | 10749/12188 [1:33:59<2:54:26, 7.27s/it] 88%|████████▊ | 10750/12188 [1:34:06<2:51:18, 7.15s/it] {'loss': 0.305, 'grad_norm': 0.6883945431464021, 'learning_rate': 3.611442393167158e-07, 'epoch': 0.88} + 88%|████████▊ | 10750/12188 [1:34:06<2:51:18, 7.15s/it] 88%|████████▊ | 10751/12188 [1:34:13<2:51:01, 7.14s/it] {'loss': 0.2877, 'grad_norm': 0.9562056490178452, 'learning_rate': 3.606485968586326e-07, 'epoch': 0.88} + 88%|████████▊ | 10751/12188 [1:34:13<2:51:01, 7.14s/it] 88%|████████▊ | 10752/12188 [1:34:20<2:47:28, 7.00s/it] {'loss': 0.3039, 'grad_norm': 0.8587805072096744, 'learning_rate': 3.6015328202409216e-07, 'epoch': 0.88} + 88%|████████▊ | 10752/12188 [1:34:20<2:47:28, 7.00s/it] 88%|████████▊ | 10753/12188 [1:34:27<2:52:14, 7.20s/it] {'loss': 0.3048, 'grad_norm': 0.7115586194417716, 'learning_rate': 3.5965829484807203e-07, 'epoch': 0.88} + 88%|████████▊ | 10753/12188 [1:34:27<2:52:14, 7.20s/it] 88%|████████▊ | 10754/12188 [1:34:35<2:55:06, 7.33s/it] {'loss': 0.303, 'grad_norm': 0.7637356696278053, 'learning_rate': 3.5916363536552754e-07, 'epoch': 0.88} + 88%|████████▊ | 10754/12188 [1:34:35<2:55:06, 7.33s/it] 88%|████████▊ | 10755/12188 [1:34:42<2:52:17, 7.21s/it] {'loss': 0.2714, 'grad_norm': 0.6947611021865987, 'learning_rate': 3.5866930361139196e-07, 'epoch': 0.88} + 88%|████████▊ | 10755/12188 [1:34:42<2:52:17, 7.21s/it] 88%|████████▊ | 10756/12188 [1:34:50<2:56:09, 7.38s/it] {'loss': 0.3006, 'grad_norm': 0.6537068901520802, 'learning_rate': 3.581752996205734e-07, 'epoch': 0.88} + 88%|████████▊ | 10756/12188 [1:34:50<2:56:09, 7.38s/it] 88%|████████▊ | 10757/12188 [1:34:56<2:51:17, 7.18s/it] {'loss': 0.2762, 'grad_norm': 0.7451305678572027, 'learning_rate': 3.5768162342795675e-07, 'epoch': 0.88} + 88%|████████▊ | 10757/12188 [1:34:56<2:51:17, 7.18s/it] 88%|████████▊ | 10758/12188 [1:35:04<2:56:34, 7.41s/it] {'loss': 0.3037, 'grad_norm': 0.737356940450702, 'learning_rate': 3.571882750684058e-07, 'epoch': 0.88} + 88%|████████▊ | 10758/12188 [1:35:04<2:56:34, 7.41s/it] 88%|████████▊ | 10759/12188 [1:35:13<3:04:20, 7.74s/it] {'loss': 0.2877, 'grad_norm': 0.6808656778979371, 'learning_rate': 3.5669525457675935e-07, 'epoch': 0.88} + 88%|████████▊ | 10759/12188 [1:35:13<3:04:20, 7.74s/it] 88%|████████▊ | 10760/12188 [1:35:20<2:58:15, 7.49s/it] {'loss': 0.2792, 'grad_norm': 0.6268649540584085, 'learning_rate': 3.5620256198783454e-07, 'epoch': 0.88} + 88%|████████▊ | 10760/12188 [1:35:20<2:58:15, 7.49s/it] 88%|████████▊ | 10761/12188 [1:35:26<2:52:27, 7.25s/it] {'loss': 0.2849, 'grad_norm': 0.621480331346605, 'learning_rate': 3.55710197336423e-07, 'epoch': 0.88} + 88%|████████▊ | 10761/12188 [1:35:26<2:52:27, 7.25s/it] 88%|████████▊ | 10762/12188 [1:35:36<3:08:08, 7.92s/it] {'loss': 0.2931, 'grad_norm': 0.7585613234337817, 'learning_rate': 3.5521816065729574e-07, 'epoch': 0.88} + 88%|████████▊ | 10762/12188 [1:35:36<3:08:08, 7.92s/it] 88%|████████▊ | 10763/12188 [1:35:42<2:58:08, 7.50s/it] {'loss': 0.2945, 'grad_norm': 0.738800977325464, 'learning_rate': 3.5472645198520064e-07, 'epoch': 0.88} + 88%|████████▊ | 10763/12188 [1:35:42<2:58:08, 7.50s/it] 88%|████████▊ | 10764/12188 [1:35:49<2:53:50, 7.33s/it] {'loss': 0.3066, 'grad_norm': 0.7934219732037373, 'learning_rate': 3.5423507135485925e-07, 'epoch': 0.88} + 88%|████████▊ | 10764/12188 [1:35:49<2:53:50, 7.33s/it] 88%|████████▊ | 10765/12188 [1:35:57<2:55:22, 7.39s/it] {'loss': 0.3045, 'grad_norm': 0.6746253694082947, 'learning_rate': 3.537440188009744e-07, 'epoch': 0.88} + 88%|████████▊ | 10765/12188 [1:35:57<2:55:22, 7.39s/it] 88%|████████▊ | 10766/12188 [1:36:05<3:00:50, 7.63s/it] {'loss': 0.2505, 'grad_norm': 0.8015085959015974, 'learning_rate': 3.5325329435822066e-07, 'epoch': 0.88} + 88%|████████▊ | 10766/12188 [1:36:05<3:00:50, 7.63s/it] 88%|████████▊ | 10767/12188 [1:36:15<3:19:21, 8.42s/it] {'loss': 0.2569, 'grad_norm': 0.685996084563905, 'learning_rate': 3.5276289806125406e-07, 'epoch': 0.88} + 88%|████████▊ | 10767/12188 [1:36:15<3:19:21, 8.42s/it] 88%|████████▊ | 10768/12188 [1:36:22<3:06:14, 7.87s/it] {'loss': 0.3141, 'grad_norm': 0.6969686687118699, 'learning_rate': 3.522728299447059e-07, 'epoch': 0.88} + 88%|████████▊ | 10768/12188 [1:36:22<3:06:14, 7.87s/it] 88%|████████▊ | 10769/12188 [1:36:30<3:04:41, 7.81s/it] {'loss': 0.2994, 'grad_norm': 0.9511167012630165, 'learning_rate': 3.517830900431829e-07, 'epoch': 0.88} + 88%|████████▊ | 10769/12188 [1:36:30<3:04:41, 7.81s/it] 88%|████████▊ | 10770/12188 [1:36:36<2:56:07, 7.45s/it] {'loss': 0.2906, 'grad_norm': 0.6500912035148473, 'learning_rate': 3.512936783912707e-07, 'epoch': 0.88} + 88%|████████▊ | 10770/12188 [1:36:36<2:56:07, 7.45s/it] 88%|████████▊ | 10771/12188 [1:36:43<2:51:04, 7.24s/it] {'loss': 0.3249, 'grad_norm': 0.7456250717301549, 'learning_rate': 3.5080459502353003e-07, 'epoch': 0.88} + 88%|████████▊ | 10771/12188 [1:36:43<2:51:04, 7.24s/it] 88%|████████▊ | 10772/12188 [1:36:49<2:45:21, 7.01s/it] {'loss': 0.3338, 'grad_norm': 1.1623343120179837, 'learning_rate': 3.5031583997449883e-07, 'epoch': 0.88} + 88%|████████▊ | 10772/12188 [1:36:49<2:45:21, 7.01s/it] 88%|████████▊ | 10773/12188 [1:36:56<2:44:45, 6.99s/it] {'loss': 0.2832, 'grad_norm': 0.7067606295232163, 'learning_rate': 3.498274132786933e-07, 'epoch': 0.88} + 88%|████████▊ | 10773/12188 [1:36:56<2:44:45, 6.99s/it] 88%|████████▊ | 10774/12188 [1:37:04<2:45:54, 7.04s/it] {'loss': 0.311, 'grad_norm': 0.7127179051156542, 'learning_rate': 3.4933931497060423e-07, 'epoch': 0.88} + 88%|████████▊ | 10774/12188 [1:37:04<2:45:54, 7.04s/it] 88%|████████▊ | 10775/12188 [1:37:11<2:45:57, 7.05s/it] {'loss': 0.2581, 'grad_norm': 0.7043432131062769, 'learning_rate': 3.4885154508470186e-07, 'epoch': 0.88} + 88%|████████▊ | 10775/12188 [1:37:11<2:45:57, 7.05s/it] 88%|████████▊ | 10776/12188 [1:37:17<2:43:53, 6.96s/it] {'loss': 0.3328, 'grad_norm': 0.7875058153744926, 'learning_rate': 3.4836410365542974e-07, 'epoch': 0.88} + 88%|████████▊ | 10776/12188 [1:37:17<2:43:53, 6.96s/it] 88%|████████▊ | 10777/12188 [1:37:25<2:45:17, 7.03s/it] {'loss': 0.3069, 'grad_norm': 0.667966867731291, 'learning_rate': 3.478769907172108e-07, 'epoch': 0.88} + 88%|████████▊ | 10777/12188 [1:37:25<2:45:17, 7.03s/it] 88%|████████▊ | 10778/12188 [1:37:32<2:48:31, 7.17s/it] {'loss': 0.2775, 'grad_norm': 0.7013046435438002, 'learning_rate': 3.47390206304446e-07, 'epoch': 0.88} + 88%|████████▊ | 10778/12188 [1:37:32<2:48:31, 7.17s/it] 88%|████████▊ | 10779/12188 [1:37:39<2:47:05, 7.12s/it] {'loss': 0.2966, 'grad_norm': 0.70448123844531, 'learning_rate': 3.469037504515083e-07, 'epoch': 0.88} + 88%|████████▊ | 10779/12188 [1:37:39<2:47:05, 7.12s/it] 88%|████████▊ | 10780/12188 [1:37:48<3:02:07, 7.76s/it] {'loss': 0.2978, 'grad_norm': 0.7737390272255159, 'learning_rate': 3.464176231927524e-07, 'epoch': 0.88} + 88%|████████▊ | 10780/12188 [1:37:48<3:02:07, 7.76s/it] 88%|████████▊ | 10781/12188 [1:37:56<2:59:34, 7.66s/it] {'loss': 0.2495, 'grad_norm': 0.7429176697699668, 'learning_rate': 3.459318245625082e-07, 'epoch': 0.88} + 88%|████████▊ | 10781/12188 [1:37:56<2:59:34, 7.66s/it] 88%|████████▊ | 10782/12188 [1:38:03<2:55:00, 7.47s/it] {'loss': 0.3106, 'grad_norm': 0.7027949863914136, 'learning_rate': 3.4544635459508037e-07, 'epoch': 0.88} + 88%|████████▊ | 10782/12188 [1:38:03<2:55:00, 7.47s/it] 88%|████████▊ | 10783/12188 [1:38:10<2:56:17, 7.53s/it] {'loss': 0.2674, 'grad_norm': 0.724606518913892, 'learning_rate': 3.4496121332475375e-07, 'epoch': 0.88} + 88%|████████▊ | 10783/12188 [1:38:10<2:56:17, 7.53s/it] 88%|████████▊ | 10784/12188 [1:38:17<2:51:13, 7.32s/it] {'loss': 0.2879, 'grad_norm': 0.6511675759282761, 'learning_rate': 3.444764007857876e-07, 'epoch': 0.88} + 88%|████████▊ | 10784/12188 [1:38:17<2:51:13, 7.32s/it] 88%|████████▊ | 10785/12188 [1:38:24<2:48:08, 7.19s/it] {'loss': 0.2952, 'grad_norm': 0.7180564514628679, 'learning_rate': 3.439919170124173e-07, 'epoch': 0.88} + 88%|████████▊ | 10785/12188 [1:38:24<2:48:08, 7.19s/it] 88%|████████▊ | 10786/12188 [1:38:32<2:55:13, 7.50s/it] {'loss': 0.2665, 'grad_norm': 0.8639292665906375, 'learning_rate': 3.435077620388577e-07, 'epoch': 0.88} + 88%|████████▊ | 10786/12188 [1:38:32<2:55:13, 7.50s/it] 89%|████████▊ | 10787/12188 [1:38:40<2:53:20, 7.42s/it] {'loss': 0.2915, 'grad_norm': 0.6946270690230265, 'learning_rate': 3.4302393589929874e-07, 'epoch': 0.89} + 89%|████████▊ | 10787/12188 [1:38:40<2:53:20, 7.42s/it] 89%|████████▊ | 10788/12188 [1:38:47<2:50:39, 7.31s/it] {'loss': 0.2817, 'grad_norm': 0.721429132461902, 'learning_rate': 3.425404386279074e-07, 'epoch': 0.89} + 89%|████████▊ | 10788/12188 [1:38:47<2:50:39, 7.31s/it] 89%|████████▊ | 10789/12188 [1:38:53<2:46:37, 7.15s/it] {'loss': 0.262, 'grad_norm': 0.6974236888033727, 'learning_rate': 3.420572702588276e-07, 'epoch': 0.89} + 89%|████████▊ | 10789/12188 [1:38:53<2:46:37, 7.15s/it] 89%|████████▊ | 10790/12188 [1:39:01<2:47:33, 7.19s/it] {'loss': 0.2933, 'grad_norm': 0.6782112756880199, 'learning_rate': 3.4157443082617924e-07, 'epoch': 0.89} + 89%|████████▊ | 10790/12188 [1:39:01<2:47:33, 7.19s/it] 89%|████████▊ | 10791/12188 [1:39:07<2:43:53, 7.04s/it] {'loss': 0.3178, 'grad_norm': 0.7301630418897865, 'learning_rate': 3.4109192036406114e-07, 'epoch': 0.89} + 89%|████████▊ | 10791/12188 [1:39:07<2:43:53, 7.04s/it] 89%|████████▊ | 10792/12188 [1:39:14<2:42:58, 7.00s/it] {'loss': 0.322, 'grad_norm': 0.7478433319223032, 'learning_rate': 3.4060973890654546e-07, 'epoch': 0.89} + 89%|████████▊ | 10792/12188 [1:39:14<2:42:58, 7.00s/it] 89%|████████▊ | 10793/12188 [1:39:21<2:40:23, 6.90s/it] {'loss': 0.3334, 'grad_norm': 0.730328110149417, 'learning_rate': 3.401278864876856e-07, 'epoch': 0.89} + 89%|████████▊ | 10793/12188 [1:39:21<2:40:23, 6.90s/it] 89%|████████▊ | 10794/12188 [1:39:28<2:41:09, 6.94s/it] {'loss': 0.3766, 'grad_norm': 0.9220129310455696, 'learning_rate': 3.396463631415065e-07, 'epoch': 0.89} + 89%|████████▊ | 10794/12188 [1:39:28<2:41:09, 6.94s/it] 89%|████████▊ | 10795/12188 [1:39:35<2:40:02, 6.89s/it] {'loss': 0.2923, 'grad_norm': 0.6975834435000785, 'learning_rate': 3.391651689020137e-07, 'epoch': 0.89} + 89%|████████▊ | 10795/12188 [1:39:35<2:40:02, 6.89s/it] 89%|████████▊ | 10796/12188 [1:39:44<2:57:46, 7.66s/it] {'loss': 0.3319, 'grad_norm': 0.6750498972903078, 'learning_rate': 3.386843038031895e-07, 'epoch': 0.89} + 89%|████████▊ | 10796/12188 [1:39:44<2:57:46, 7.66s/it] 89%|████████▊ | 10797/12188 [1:39:54<3:15:11, 8.42s/it] {'loss': 0.273, 'grad_norm': 0.7288450013664576, 'learning_rate': 3.3820376787899013e-07, 'epoch': 0.89} + 89%|████████▊ | 10797/12188 [1:39:54<3:15:11, 8.42s/it] 89%|████████▊ | 10798/12188 [1:40:01<3:05:03, 7.99s/it] {'loss': 0.2655, 'grad_norm': 0.6835775441308786, 'learning_rate': 3.3772356116335113e-07, 'epoch': 0.89} + 89%|████████▊ | 10798/12188 [1:40:01<3:05:03, 7.99s/it] 89%|████████▊ | 10799/12188 [1:40:09<3:02:40, 7.89s/it] {'loss': 0.2913, 'grad_norm': 0.6762043350106368, 'learning_rate': 3.3724368369018435e-07, 'epoch': 0.89} + 89%|████████▊ | 10799/12188 [1:40:09<3:02:40, 7.89s/it] 89%|████████▊ | 10800/12188 [1:40:19<3:19:32, 8.63s/it] {'loss': 0.2896, 'grad_norm': 0.7049489005875917, 'learning_rate': 3.367641354933765e-07, 'epoch': 0.89} + 89%|████████▊ | 10800/12188 [1:40:19<3:19:32, 8.63s/it] 89%|████████▊ | 10801/12188 [1:40:27<3:13:07, 8.35s/it] {'loss': 0.2972, 'grad_norm': 0.7142339764913342, 'learning_rate': 3.3628491660679385e-07, 'epoch': 0.89} + 89%|████████▊ | 10801/12188 [1:40:27<3:13:07, 8.35s/it] 89%|████████▊ | 10802/12188 [1:40:35<3:06:33, 8.08s/it] {'loss': 0.3084, 'grad_norm': 0.661293627677091, 'learning_rate': 3.3580602706427654e-07, 'epoch': 0.89} + 89%|████████▊ | 10802/12188 [1:40:35<3:06:33, 8.08s/it] 89%|████████▊ | 10803/12188 [1:40:41<2:56:10, 7.63s/it] {'loss': 0.2907, 'grad_norm': 0.6734843865113084, 'learning_rate': 3.353274668996442e-07, 'epoch': 0.89} + 89%|████████▊ | 10803/12188 [1:40:41<2:56:10, 7.63s/it] 89%|████████▊ | 10804/12188 [1:40:48<2:50:51, 7.41s/it] {'loss': 0.2917, 'grad_norm': 0.7657539653435671, 'learning_rate': 3.348492361466932e-07, 'epoch': 0.89} + 89%|████████▊ | 10804/12188 [1:40:48<2:50:51, 7.41s/it] 89%|████████▊ | 10805/12188 [1:40:56<2:57:05, 7.68s/it] {'loss': 0.271, 'grad_norm': 0.7203982782120762, 'learning_rate': 3.343713348391925e-07, 'epoch': 0.89} + 89%|████████▊ | 10805/12188 [1:40:56<2:57:05, 7.68s/it] 89%|████████▊ | 10806/12188 [1:41:04<2:58:45, 7.76s/it] {'loss': 0.2812, 'grad_norm': 0.6871958868198255, 'learning_rate': 3.338937630108929e-07, 'epoch': 0.89} + 89%|████████▊ | 10806/12188 [1:41:04<2:58:45, 7.76s/it] 89%|████████▊ | 10807/12188 [1:41:11<2:51:05, 7.43s/it] {'loss': 0.3037, 'grad_norm': 0.6835127584938527, 'learning_rate': 3.3341652069551866e-07, 'epoch': 0.89} + 89%|████████▊ | 10807/12188 [1:41:11<2:51:05, 7.43s/it] 89%|████████▊ | 10808/12188 [1:41:18<2:49:44, 7.38s/it] {'loss': 0.278, 'grad_norm': 0.7147492660687821, 'learning_rate': 3.329396079267727e-07, 'epoch': 0.89} + 89%|████████▊ | 10808/12188 [1:41:18<2:49:44, 7.38s/it] 89%|████████▊ | 10809/12188 [1:41:25<2:45:01, 7.18s/it] {'loss': 0.2995, 'grad_norm': 0.7048103901016645, 'learning_rate': 3.324630247383337e-07, 'epoch': 0.89} + 89%|████████▊ | 10809/12188 [1:41:25<2:45:01, 7.18s/it] 89%|████████▊ | 10810/12188 [1:41:32<2:43:15, 7.11s/it] {'loss': 0.3434, 'grad_norm': 0.6910702457580282, 'learning_rate': 3.319867711638558e-07, 'epoch': 0.89} + 89%|████████▊ | 10810/12188 [1:41:32<2:43:15, 7.11s/it] 89%|████████▊ | 10811/12188 [1:41:39<2:44:40, 7.18s/it] {'loss': 0.2917, 'grad_norm': 0.6647738203091207, 'learning_rate': 3.315108472369738e-07, 'epoch': 0.89} + 89%|████████▊ | 10811/12188 [1:41:39<2:44:40, 7.18s/it] 89%|████████▊ | 10812/12188 [1:41:46<2:44:32, 7.17s/it] {'loss': 0.3218, 'grad_norm': 1.1548288157474993, 'learning_rate': 3.310352529912952e-07, 'epoch': 0.89} + 89%|████████▊ | 10812/12188 [1:41:46<2:44:32, 7.17s/it] 89%|████████▊ | 10813/12188 [1:41:56<2:58:22, 7.78s/it] {'loss': 0.3267, 'grad_norm': 0.7032198116176429, 'learning_rate': 3.3055998846040436e-07, 'epoch': 0.89} + 89%|████████▊ | 10813/12188 [1:41:56<2:58:22, 7.78s/it] 89%|████████▊ | 10814/12188 [1:42:02<2:50:18, 7.44s/it] {'loss': 0.3198, 'grad_norm': 0.6764647008162368, 'learning_rate': 3.3008505367786656e-07, 'epoch': 0.89} + 89%|████████▊ | 10814/12188 [1:42:02<2:50:18, 7.44s/it] 89%|████████▊ | 10815/12188 [1:42:09<2:45:56, 7.25s/it] {'loss': 0.2765, 'grad_norm': 0.804983849648139, 'learning_rate': 3.296104486772184e-07, 'epoch': 0.89} + 89%|████████▊ | 10815/12188 [1:42:09<2:45:56, 7.25s/it] 89%|████████▊ | 10816/12188 [1:42:15<2:39:27, 6.97s/it] {'loss': 0.3047, 'grad_norm': 0.6618731503917012, 'learning_rate': 3.291361734919768e-07, 'epoch': 0.89} + 89%|████████▊ | 10816/12188 [1:42:15<2:39:27, 6.97s/it] 89%|████████▉ | 10817/12188 [1:42:22<2:40:10, 7.01s/it] {'loss': 0.2744, 'grad_norm': 0.7289259368006815, 'learning_rate': 3.2866222815563565e-07, 'epoch': 0.89} + 89%|████████▉ | 10817/12188 [1:42:22<2:40:10, 7.01s/it] 89%|████████▉ | 10818/12188 [1:42:31<2:51:26, 7.51s/it] {'loss': 0.3841, 'grad_norm': 0.7998006761433493, 'learning_rate': 3.2818861270166147e-07, 'epoch': 0.89} + 89%|████████▉ | 10818/12188 [1:42:31<2:51:26, 7.51s/it] 89%|████████▉ | 10819/12188 [1:42:38<2:47:28, 7.34s/it] {'loss': 0.3319, 'grad_norm': 0.7098420379514835, 'learning_rate': 3.277153271635025e-07, 'epoch': 0.89} + 89%|████████▉ | 10819/12188 [1:42:38<2:47:28, 7.34s/it] 89%|████████▉ | 10820/12188 [1:42:45<2:46:05, 7.28s/it] {'loss': 0.2845, 'grad_norm': 0.681547246443237, 'learning_rate': 3.272423715745798e-07, 'epoch': 0.89} + 89%|████████▉ | 10820/12188 [1:42:45<2:46:05, 7.28s/it] 89%|████████▉ | 10821/12188 [1:42:52<2:43:34, 7.18s/it] {'loss': 0.2664, 'grad_norm': 0.7385974405659341, 'learning_rate': 3.2676974596829337e-07, 'epoch': 0.89} + 89%|████████▉ | 10821/12188 [1:42:52<2:43:34, 7.18s/it] 89%|████████▉ | 10822/12188 [1:42:59<2:39:21, 7.00s/it] {'loss': 0.3044, 'grad_norm': 0.7484189541667893, 'learning_rate': 3.262974503780203e-07, 'epoch': 0.89} + 89%|████████▉ | 10822/12188 [1:42:59<2:39:21, 7.00s/it] 89%|████████▉ | 10823/12188 [1:43:06<2:38:36, 6.97s/it] {'loss': 0.3149, 'grad_norm': 0.7259241535319886, 'learning_rate': 3.2582548483711173e-07, 'epoch': 0.89} + 89%|████████▉ | 10823/12188 [1:43:06<2:38:36, 6.97s/it] 89%|████████▉ | 10824/12188 [1:43:13<2:44:32, 7.24s/it] {'loss': 0.2885, 'grad_norm': 0.6641245514347613, 'learning_rate': 3.2535384937889824e-07, 'epoch': 0.89} + 89%|████████▉ | 10824/12188 [1:43:14<2:44:32, 7.24s/it] 89%|████████▉ | 10825/12188 [1:43:21<2:45:05, 7.27s/it] {'loss': 0.3422, 'grad_norm': 0.7989539124930556, 'learning_rate': 3.2488254403668484e-07, 'epoch': 0.89} + 89%|████████▉ | 10825/12188 [1:43:21<2:45:05, 7.27s/it] 89%|████████▉ | 10826/12188 [1:43:29<2:49:07, 7.45s/it] {'loss': 0.2629, 'grad_norm': 0.7227075894439258, 'learning_rate': 3.2441156884375604e-07, 'epoch': 0.89} + 89%|████████▉ | 10826/12188 [1:43:29<2:49:07, 7.45s/it] 89%|████████▉ | 10827/12188 [1:43:37<2:55:17, 7.73s/it] {'loss': 0.3243, 'grad_norm': 0.7780915202268474, 'learning_rate': 3.239409238333702e-07, 'epoch': 0.89} + 89%|████████▉ | 10827/12188 [1:43:37<2:55:17, 7.73s/it] 89%|████████▉ | 10828/12188 [1:43:45<2:56:24, 7.78s/it] {'loss': 0.3161, 'grad_norm': 0.7817101946948053, 'learning_rate': 3.2347060903876235e-07, 'epoch': 0.89} + 89%|████████▉ | 10828/12188 [1:43:45<2:56:24, 7.78s/it] 89%|████████▉ | 10829/12188 [1:43:52<2:54:16, 7.69s/it] {'loss': 0.2923, 'grad_norm': 0.7229107946789612, 'learning_rate': 3.230006244931483e-07, 'epoch': 0.89} + 89%|████████▉ | 10829/12188 [1:43:52<2:54:16, 7.69s/it] 89%|████████▉ | 10830/12188 [1:44:00<2:54:25, 7.71s/it] {'loss': 0.3105, 'grad_norm': 0.6513375806001469, 'learning_rate': 3.225309702297147e-07, 'epoch': 0.89} + 89%|████████▉ | 10830/12188 [1:44:00<2:54:25, 7.71s/it] 89%|████████▉ | 10831/12188 [1:44:07<2:49:21, 7.49s/it] {'loss': 0.2985, 'grad_norm': 0.7888002315441996, 'learning_rate': 3.2206164628162893e-07, 'epoch': 0.89} + 89%|████████▉ | 10831/12188 [1:44:07<2:49:21, 7.49s/it] 89%|████████▉ | 10832/12188 [1:44:14<2:43:48, 7.25s/it] {'loss': 0.3168, 'grad_norm': 0.7340768330967434, 'learning_rate': 3.215926526820351e-07, 'epoch': 0.89} + 89%|████████▉ | 10832/12188 [1:44:14<2:43:48, 7.25s/it] 89%|████████▉ | 10833/12188 [1:44:20<2:38:31, 7.02s/it] {'loss': 0.29, 'grad_norm': 0.7683702079895828, 'learning_rate': 3.211239894640511e-07, 'epoch': 0.89} + 89%|████████▉ | 10833/12188 [1:44:20<2:38:31, 7.02s/it] 89%|████████▉ | 10834/12188 [1:44:28<2:39:41, 7.08s/it] {'loss': 0.2691, 'grad_norm': 0.6202136575031338, 'learning_rate': 3.2065565666077324e-07, 'epoch': 0.89} + 89%|████████▉ | 10834/12188 [1:44:28<2:39:41, 7.08s/it] 89%|████████▉ | 10835/12188 [1:44:35<2:39:34, 7.08s/it] {'loss': 0.3325, 'grad_norm': 0.7388909356314423, 'learning_rate': 3.201876543052762e-07, 'epoch': 0.89} + 89%|████████▉ | 10835/12188 [1:44:35<2:39:34, 7.08s/it] 89%|████████▉ | 10836/12188 [1:44:41<2:35:49, 6.92s/it] {'loss': 0.2956, 'grad_norm': 0.718295075180934, 'learning_rate': 3.197199824306074e-07, 'epoch': 0.89} + 89%|████████▉ | 10836/12188 [1:44:41<2:35:49, 6.92s/it] 89%|████████▉ | 10837/12188 [1:44:48<2:35:07, 6.89s/it] {'loss': 0.2764, 'grad_norm': 0.6917692684898977, 'learning_rate': 3.1925264106979493e-07, 'epoch': 0.89} + 89%|████████▉ | 10837/12188 [1:44:48<2:35:07, 6.89s/it] 89%|████████▉ | 10838/12188 [1:44:55<2:35:11, 6.90s/it] {'loss': 0.2844, 'grad_norm': 0.8300058216577257, 'learning_rate': 3.187856302558395e-07, 'epoch': 0.89} + 89%|████████▉ | 10838/12188 [1:44:55<2:35:11, 6.90s/it] 89%|████████▉ | 10839/12188 [1:45:02<2:36:18, 6.95s/it] {'loss': 0.3091, 'grad_norm': 0.6710197629683972, 'learning_rate': 3.183189500217232e-07, 'epoch': 0.89} + 89%|████████▉ | 10839/12188 [1:45:02<2:36:18, 6.95s/it] 89%|████████▉ | 10840/12188 [1:45:09<2:37:02, 6.99s/it] {'loss': 0.2807, 'grad_norm': 0.6816802181732426, 'learning_rate': 3.178526004004007e-07, 'epoch': 0.89} + 89%|████████▉ | 10840/12188 [1:45:09<2:37:02, 6.99s/it] 89%|████████▉ | 10841/12188 [1:45:17<2:39:55, 7.12s/it] {'loss': 0.3214, 'grad_norm': 0.6796246200064088, 'learning_rate': 3.173865814248045e-07, 'epoch': 0.89} + 89%|████████▉ | 10841/12188 [1:45:17<2:39:55, 7.12s/it] 89%|████████▉ | 10842/12188 [1:45:25<2:45:42, 7.39s/it] {'loss': 0.3168, 'grad_norm': 0.6826435758656089, 'learning_rate': 3.1692089312784556e-07, 'epoch': 0.89} + 89%|████████▉ | 10842/12188 [1:45:25<2:45:42, 7.39s/it] 89%|████████▉ | 10843/12188 [1:45:35<3:04:01, 8.21s/it] {'loss': 0.3163, 'grad_norm': 0.7431158064158394, 'learning_rate': 3.1645553554240815e-07, 'epoch': 0.89} + 89%|████████▉ | 10843/12188 [1:45:35<3:04:01, 8.21s/it] 89%|████████▉ | 10844/12188 [1:45:43<3:02:39, 8.15s/it] {'loss': 0.2861, 'grad_norm': 0.6796578400321, 'learning_rate': 3.159905087013565e-07, 'epoch': 0.89} + 89%|████████▉ | 10844/12188 [1:45:43<3:02:39, 8.15s/it] 89%|████████▉ | 10845/12188 [1:45:50<2:55:52, 7.86s/it] {'loss': 0.2832, 'grad_norm': 0.6371220897235447, 'learning_rate': 3.155258126375299e-07, 'epoch': 0.89} + 89%|████████▉ | 10845/12188 [1:45:50<2:55:52, 7.86s/it] 89%|████████▉ | 10846/12188 [1:45:57<2:48:10, 7.52s/it] {'loss': 0.2754, 'grad_norm': 0.8072115762476729, 'learning_rate': 3.150614473837438e-07, 'epoch': 0.89} + 89%|████████▉ | 10846/12188 [1:45:57<2:48:10, 7.52s/it] 89%|████████▉ | 10847/12188 [1:46:04<2:44:43, 7.37s/it] {'loss': 0.3087, 'grad_norm': 0.7069170150577162, 'learning_rate': 3.1459741297279247e-07, 'epoch': 0.89} + 89%|████████▉ | 10847/12188 [1:46:04<2:44:43, 7.37s/it] 89%|████████▉ | 10848/12188 [1:46:10<2:40:34, 7.19s/it] {'loss': 0.28, 'grad_norm': 0.7477296898236298, 'learning_rate': 3.1413370943744305e-07, 'epoch': 0.89} + 89%|████████▉ | 10848/12188 [1:46:10<2:40:34, 7.19s/it] 89%|████████▉ | 10849/12188 [1:46:17<2:38:41, 7.11s/it] {'loss': 0.279, 'grad_norm': 0.7056635402775728, 'learning_rate': 3.136703368104421e-07, 'epoch': 0.89} + 89%|████████▉ | 10849/12188 [1:46:17<2:38:41, 7.11s/it] 89%|████████▉ | 10850/12188 [1:46:25<2:42:28, 7.29s/it] {'loss': 0.3128, 'grad_norm': 0.7100749965445801, 'learning_rate': 3.1320729512451466e-07, 'epoch': 0.89} + 89%|████████▉ | 10850/12188 [1:46:25<2:42:28, 7.29s/it] 89%|████████▉ | 10851/12188 [1:46:34<2:54:11, 7.82s/it] {'loss': 0.3042, 'grad_norm': 0.702652039833649, 'learning_rate': 3.1274458441235664e-07, 'epoch': 0.89} + 89%|████████▉ | 10851/12188 [1:46:34<2:54:11, 7.82s/it] 89%|████████▉ | 10852/12188 [1:46:41<2:47:26, 7.52s/it] {'loss': 0.2651, 'grad_norm': 0.6515513907012971, 'learning_rate': 3.122822047066465e-07, 'epoch': 0.89} + 89%|████████▉ | 10852/12188 [1:46:41<2:47:26, 7.52s/it] 89%|████████▉ | 10853/12188 [1:46:49<2:49:55, 7.64s/it] {'loss': 0.2706, 'grad_norm': 0.6916139181300489, 'learning_rate': 3.118201560400347e-07, 'epoch': 0.89} + 89%|████████▉ | 10853/12188 [1:46:49<2:49:55, 7.64s/it] 89%|████████▉ | 10854/12188 [1:46:56<2:49:33, 7.63s/it] {'loss': 0.318, 'grad_norm': 0.749606295017447, 'learning_rate': 3.113584384451518e-07, 'epoch': 0.89} + 89%|████████▉ | 10854/12188 [1:46:56<2:49:33, 7.63s/it] 89%|████████▉ | 10855/12188 [1:47:03<2:42:46, 7.33s/it] {'loss': 0.3122, 'grad_norm': 0.7513571833940018, 'learning_rate': 3.108970519546034e-07, 'epoch': 0.89} + 89%|████████▉ | 10855/12188 [1:47:03<2:42:46, 7.33s/it] 89%|████████▉ | 10856/12188 [1:47:11<2:48:33, 7.59s/it] {'loss': 0.298, 'grad_norm': 0.7729236461561649, 'learning_rate': 3.1043599660097125e-07, 'epoch': 0.89} + 89%|████████▉ | 10856/12188 [1:47:11<2:48:33, 7.59s/it] 89%|████████▉ | 10857/12188 [1:47:19<2:47:11, 7.54s/it] {'loss': 0.2722, 'grad_norm': 0.8043280504603794, 'learning_rate': 3.0997527241681425e-07, 'epoch': 0.89} + 89%|████████▉ | 10857/12188 [1:47:19<2:47:11, 7.54s/it] 89%|████████▉ | 10858/12188 [1:47:26<2:42:42, 7.34s/it] {'loss': 0.3459, 'grad_norm': 0.7234542783669548, 'learning_rate': 3.095148794346692e-07, 'epoch': 0.89} + 89%|████████▉ | 10858/12188 [1:47:26<2:42:42, 7.34s/it] 89%|████████▉ | 10859/12188 [1:47:33<2:40:49, 7.26s/it] {'loss': 0.2673, 'grad_norm': 0.6494417818751382, 'learning_rate': 3.090548176870467e-07, 'epoch': 0.89} + 89%|████████▉ | 10859/12188 [1:47:33<2:40:49, 7.26s/it] 89%|████████▉ | 10860/12188 [1:47:39<2:35:38, 7.03s/it] {'loss': 0.2884, 'grad_norm': 0.923083291231742, 'learning_rate': 3.0859508720643693e-07, 'epoch': 0.89} + 89%|████████▉ | 10860/12188 [1:47:39<2:35:38, 7.03s/it] 89%|████████▉ | 10861/12188 [1:47:47<2:38:41, 7.18s/it] {'loss': 0.3125, 'grad_norm': 0.7296758404531482, 'learning_rate': 3.0813568802530445e-07, 'epoch': 0.89} + 89%|████████▉ | 10861/12188 [1:47:47<2:38:41, 7.18s/it] 89%|████████▉ | 10862/12188 [1:47:55<2:44:53, 7.46s/it] {'loss': 0.3145, 'grad_norm': 0.7297117054463833, 'learning_rate': 3.0767662017609167e-07, 'epoch': 0.89} + 89%|████████▉ | 10862/12188 [1:47:55<2:44:53, 7.46s/it] 89%|████████▉ | 10863/12188 [1:48:02<2:41:58, 7.33s/it] {'loss': 0.3232, 'grad_norm': 0.7537340140946273, 'learning_rate': 3.0721788369121765e-07, 'epoch': 0.89} + 89%|████████▉ | 10863/12188 [1:48:02<2:41:58, 7.33s/it] 89%|████████▉ | 10864/12188 [1:48:09<2:39:37, 7.23s/it] {'loss': 0.2958, 'grad_norm': 0.6456330037516474, 'learning_rate': 3.0675947860307643e-07, 'epoch': 0.89} + 89%|████████▉ | 10864/12188 [1:48:09<2:39:37, 7.23s/it] 89%|████████▉ | 10865/12188 [1:48:16<2:37:49, 7.16s/it] {'loss': 0.3221, 'grad_norm': 0.8455813428629453, 'learning_rate': 3.0630140494404103e-07, 'epoch': 0.89} + 89%|████████▉ | 10865/12188 [1:48:16<2:37:49, 7.16s/it] 89%|████████▉ | 10866/12188 [1:48:22<2:32:59, 6.94s/it] {'loss': 0.2828, 'grad_norm': 0.6894925920450609, 'learning_rate': 3.0584366274645947e-07, 'epoch': 0.89} + 89%|████████▉ | 10866/12188 [1:48:22<2:32:59, 6.94s/it] 89%|████████▉ | 10867/12188 [1:48:30<2:37:26, 7.15s/it] {'loss': 0.3264, 'grad_norm': 0.6622760221088576, 'learning_rate': 3.05386252042657e-07, 'epoch': 0.89} + 89%|████████▉ | 10867/12188 [1:48:30<2:37:26, 7.15s/it] 89%|████████▉ | 10868/12188 [1:48:37<2:36:05, 7.10s/it] {'loss': 0.2818, 'grad_norm': 0.7005191868594292, 'learning_rate': 3.0492917286493507e-07, 'epoch': 0.89} + 89%|████████▉ | 10868/12188 [1:48:37<2:36:05, 7.10s/it] 89%|████████▉ | 10869/12188 [1:48:44<2:36:07, 7.10s/it] {'loss': 0.2988, 'grad_norm': 0.7306751557316571, 'learning_rate': 3.044724252455711e-07, 'epoch': 0.89} + 89%|████████▉ | 10869/12188 [1:48:44<2:36:07, 7.10s/it] 89%|████████▉ | 10870/12188 [1:48:51<2:37:46, 7.18s/it] {'loss': 0.3026, 'grad_norm': 0.6551756081781487, 'learning_rate': 3.0401600921681986e-07, 'epoch': 0.89} + 89%|████████▉ | 10870/12188 [1:48:51<2:37:46, 7.18s/it] 89%|████████▉ | 10871/12188 [1:48:59<2:43:54, 7.47s/it] {'loss': 0.2804, 'grad_norm': 0.7067005477370957, 'learning_rate': 3.0355992481091503e-07, 'epoch': 0.89} + 89%|████████▉ | 10871/12188 [1:48:59<2:43:54, 7.47s/it] 89%|████████▉ | 10872/12188 [1:49:07<2:41:14, 7.35s/it] {'loss': 0.3135, 'grad_norm': 0.6572516276315826, 'learning_rate': 3.0310417206006136e-07, 'epoch': 0.89} + 89%|████████▉ | 10872/12188 [1:49:07<2:41:14, 7.35s/it] 89%|████████▉ | 10873/12188 [1:49:14<2:40:49, 7.34s/it] {'loss': 0.2807, 'grad_norm': 0.6504505158131968, 'learning_rate': 3.0264875099644595e-07, 'epoch': 0.89} + 89%|████████▉ | 10873/12188 [1:49:14<2:40:49, 7.34s/it] 89%|████████▉ | 10874/12188 [1:49:21<2:36:55, 7.17s/it] {'loss': 0.3161, 'grad_norm': 0.683768418428627, 'learning_rate': 3.021936616522281e-07, 'epoch': 0.89} + 89%|████████▉ | 10874/12188 [1:49:21<2:36:55, 7.17s/it] 89%|████████▉ | 10875/12188 [1:49:29<2:42:03, 7.41s/it] {'loss': 0.2956, 'grad_norm': 0.7130306072157914, 'learning_rate': 3.017389040595464e-07, 'epoch': 0.89} + 89%|████████▉ | 10875/12188 [1:49:29<2:42:03, 7.41s/it] 89%|████████▉ | 10876/12188 [1:49:35<2:38:47, 7.26s/it] {'loss': 0.2744, 'grad_norm': 0.8021579139877233, 'learning_rate': 3.0128447825051587e-07, 'epoch': 0.89} + 89%|████████▉ | 10876/12188 [1:49:35<2:38:47, 7.26s/it] 89%|████████▉ | 10877/12188 [1:49:43<2:41:03, 7.37s/it] {'loss': 0.2701, 'grad_norm': 0.651657430444367, 'learning_rate': 3.008303842572252e-07, 'epoch': 0.89} + 89%|████████▉ | 10877/12188 [1:49:43<2:41:03, 7.37s/it] 89%|████████▉ | 10878/12188 [1:49:50<2:40:20, 7.34s/it] {'loss': 0.3164, 'grad_norm': 0.6484546190247283, 'learning_rate': 3.0037662211174437e-07, 'epoch': 0.89} + 89%|████████▉ | 10878/12188 [1:49:50<2:40:20, 7.34s/it] 89%|████████▉ | 10879/12188 [1:49:58<2:41:14, 7.39s/it] {'loss': 0.2935, 'grad_norm': 0.814262189076426, 'learning_rate': 2.9992319184611485e-07, 'epoch': 0.89} + 89%|████████▉ | 10879/12188 [1:49:58<2:41:14, 7.39s/it] 89%|████████▉ | 10880/12188 [1:50:05<2:41:13, 7.40s/it] {'loss': 0.3104, 'grad_norm': 0.7715402655729438, 'learning_rate': 2.994700934923589e-07, 'epoch': 0.89} + 89%|████████▉ | 10880/12188 [1:50:05<2:41:13, 7.40s/it] 89%|████████▉ | 10881/12188 [1:50:12<2:38:56, 7.30s/it] {'loss': 0.3122, 'grad_norm': 0.9164684457541452, 'learning_rate': 2.9901732708247256e-07, 'epoch': 0.89} + 89%|████████▉ | 10881/12188 [1:50:12<2:38:56, 7.30s/it] 89%|████████▉ | 10882/12188 [1:50:19<2:36:30, 7.19s/it] {'loss': 0.2945, 'grad_norm': 0.7076797274152618, 'learning_rate': 2.9856489264843025e-07, 'epoch': 0.89} + 89%|████████▉ | 10882/12188 [1:50:19<2:36:30, 7.19s/it] 89%|████████▉ | 10883/12188 [1:50:26<2:31:35, 6.97s/it] {'loss': 0.3228, 'grad_norm': 0.6913351967974697, 'learning_rate': 2.981127902221809e-07, 'epoch': 0.89} + 89%|████████▉ | 10883/12188 [1:50:26<2:31:35, 6.97s/it] 89%|████████▉ | 10884/12188 [1:50:34<2:42:23, 7.47s/it] {'loss': 0.2701, 'grad_norm': 0.7297479767556889, 'learning_rate': 2.976610198356533e-07, 'epoch': 0.89} + 89%|████████▉ | 10884/12188 [1:50:34<2:42:23, 7.47s/it] 89%|████████▉ | 10885/12188 [1:50:41<2:38:56, 7.32s/it] {'loss': 0.2997, 'grad_norm': 0.6976886066877085, 'learning_rate': 2.972095815207487e-07, 'epoch': 0.89} + 89%|████████▉ | 10885/12188 [1:50:41<2:38:56, 7.32s/it] 89%|████████▉ | 10886/12188 [1:50:51<2:52:35, 7.95s/it] {'loss': 0.2832, 'grad_norm': 0.8138496560127182, 'learning_rate': 2.9675847530934887e-07, 'epoch': 0.89} + 89%|████████▉ | 10886/12188 [1:50:51<2:52:35, 7.95s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 89%|████████▉ | 10887/12188 [1:50:56<2:36:41, 7.23s/it] {'loss': 0.6372, 'grad_norm': 0.5769117514713917, 'learning_rate': 2.963077012333082e-07, 'epoch': 0.89} + 89%|████████▉ | 10887/12188 [1:50:56<2:36:41, 7.23s/it] 89%|████████▉ | 10888/12188 [1:51:06<2:53:26, 8.01s/it] {'loss': 0.2919, 'grad_norm': 0.6925629277208385, 'learning_rate': 2.9585725932446075e-07, 'epoch': 0.89} + 89%|████████▉ | 10888/12188 [1:51:06<2:53:26, 8.01s/it] 89%|████████▉ | 10889/12188 [1:51:13<2:48:40, 7.79s/it] {'loss': 0.318, 'grad_norm': 0.676507696219157, 'learning_rate': 2.95407149614616e-07, 'epoch': 0.89} + 89%|████████▉ | 10889/12188 [1:51:13<2:48:40, 7.79s/it] 89%|████████▉ | 10890/12188 [1:51:20<2:43:10, 7.54s/it] {'loss': 0.3093, 'grad_norm': 0.7065558525680767, 'learning_rate': 2.9495737213555977e-07, 'epoch': 0.89} + 89%|████████▉ | 10890/12188 [1:51:20<2:43:10, 7.54s/it] 89%|████████▉ | 10891/12188 [1:51:28<2:44:38, 7.62s/it] {'loss': 0.2826, 'grad_norm': 0.8475060699495368, 'learning_rate': 2.9450792691905537e-07, 'epoch': 0.89} + 89%|████████▉ | 10891/12188 [1:51:28<2:44:38, 7.62s/it] 89%|████████▉ | 10892/12188 [1:51:35<2:39:19, 7.38s/it] {'loss': 0.2845, 'grad_norm': 0.6690612937246283, 'learning_rate': 2.940588139968403e-07, 'epoch': 0.89} + 89%|████████▉ | 10892/12188 [1:51:35<2:39:19, 7.38s/it] 89%|████████▉ | 10893/12188 [1:51:42<2:36:02, 7.23s/it] {'loss': 0.3006, 'grad_norm': 2.205660001416486, 'learning_rate': 2.9361003340063087e-07, 'epoch': 0.89} + 89%|████████▉ | 10893/12188 [1:51:42<2:36:02, 7.23s/it] 89%|████████▉ | 10894/12188 [1:51:49<2:37:07, 7.29s/it] {'loss': 0.2748, 'grad_norm': 0.9023827879085891, 'learning_rate': 2.9316158516212056e-07, 'epoch': 0.89} + 89%|████████▉ | 10894/12188 [1:51:49<2:37:07, 7.29s/it] 89%|████████▉ | 10895/12188 [1:51:57<2:39:41, 7.41s/it] {'loss': 0.3195, 'grad_norm': 0.8342565568165216, 'learning_rate': 2.9271346931297684e-07, 'epoch': 0.89} + 89%|████████▉ | 10895/12188 [1:51:57<2:39:41, 7.41s/it] 89%|████████▉ | 10896/12188 [1:52:05<2:40:38, 7.46s/it] {'loss': 0.3137, 'grad_norm': 0.6998111525184544, 'learning_rate': 2.9226568588484437e-07, 'epoch': 0.89} + 89%|████████▉ | 10896/12188 [1:52:05<2:40:38, 7.46s/it] 89%|████████▉ | 10897/12188 [1:52:13<2:43:49, 7.61s/it] {'loss': 0.2918, 'grad_norm': 0.6423626466166217, 'learning_rate': 2.918182349093468e-07, 'epoch': 0.89} + 89%|████████▉ | 10897/12188 [1:52:13<2:43:49, 7.61s/it] 89%|████████▉ | 10898/12188 [1:52:20<2:41:25, 7.51s/it] {'loss': 0.3087, 'grad_norm': 0.7210907142927755, 'learning_rate': 2.913711164180799e-07, 'epoch': 0.89} + 89%|████████▉ | 10898/12188 [1:52:20<2:41:25, 7.51s/it] 89%|████████▉ | 10899/12188 [1:52:27<2:39:57, 7.45s/it] {'loss': 0.2908, 'grad_norm': 0.642432259392805, 'learning_rate': 2.909243304426207e-07, 'epoch': 0.89} + 89%|████████▉ | 10899/12188 [1:52:27<2:39:57, 7.45s/it] 89%|████████▉ | 10900/12188 [1:52:34<2:36:03, 7.27s/it] {'loss': 0.2667, 'grad_norm': 0.7141665408817929, 'learning_rate': 2.904778770145189e-07, 'epoch': 0.89} + 89%|████████▉ | 10900/12188 [1:52:34<2:36:03, 7.27s/it] 89%|████████▉ | 10901/12188 [1:52:42<2:38:38, 7.40s/it] {'loss': 0.3078, 'grad_norm': 0.7209895640347819, 'learning_rate': 2.9003175616530264e-07, 'epoch': 0.89} + 89%|████████▉ | 10901/12188 [1:52:42<2:38:38, 7.40s/it] 89%|████████▉ | 10902/12188 [1:52:49<2:37:09, 7.33s/it] {'loss': 0.2733, 'grad_norm': 0.7486289720917149, 'learning_rate': 2.895859679264779e-07, 'epoch': 0.89} + 89%|████████▉ | 10902/12188 [1:52:49<2:37:09, 7.33s/it] 89%|████████▉ | 10903/12188 [1:52:56<2:33:40, 7.18s/it] {'loss': 0.3171, 'grad_norm': 0.7739089336241555, 'learning_rate': 2.8914051232952325e-07, 'epoch': 0.89} + 89%|████████▉ | 10903/12188 [1:52:56<2:33:40, 7.18s/it] 89%|████████▉ | 10904/12188 [1:53:03<2:34:48, 7.23s/it] {'loss': 0.2889, 'grad_norm': 0.7738067177352186, 'learning_rate': 2.88695389405898e-07, 'epoch': 0.89} + 89%|████████▉ | 10904/12188 [1:53:03<2:34:48, 7.23s/it] 89%|████████▉ | 10905/12188 [1:53:10<2:30:28, 7.04s/it] {'loss': 0.2793, 'grad_norm': 1.7362854566028287, 'learning_rate': 2.882505991870338e-07, 'epoch': 0.89} + 89%|████████▉ | 10905/12188 [1:53:10<2:30:28, 7.04s/it] 89%|████████▉ | 10906/12188 [1:53:17<2:34:11, 7.22s/it] {'loss': 0.3691, 'grad_norm': 0.74298095180728, 'learning_rate': 2.8780614170434264e-07, 'epoch': 0.89} + 89%|████████▉ | 10906/12188 [1:53:17<2:34:11, 7.22s/it] 89%|████████▉ | 10907/12188 [1:53:24<2:30:52, 7.07s/it] {'loss': 0.2924, 'grad_norm': 0.6595627315675903, 'learning_rate': 2.873620169892116e-07, 'epoch': 0.89} + 89%|████████▉ | 10907/12188 [1:53:24<2:30:52, 7.07s/it] 89%|████████▉ | 10908/12188 [1:53:31<2:29:49, 7.02s/it] {'loss': 0.3197, 'grad_norm': 0.7658306968004694, 'learning_rate': 2.86918225073004e-07, 'epoch': 0.89} + 89%|████████▉ | 10908/12188 [1:53:31<2:29:49, 7.02s/it] 90%|████████▉ | 10909/12188 [1:53:39<2:35:47, 7.31s/it] {'loss': 0.2923, 'grad_norm': 0.6921074793945114, 'learning_rate': 2.8647476598705816e-07, 'epoch': 0.9} + 90%|████████▉ | 10909/12188 [1:53:39<2:35:47, 7.31s/it] 90%|████████▉ | 10910/12188 [1:53:46<2:32:55, 7.18s/it] {'loss': 0.3353, 'grad_norm': 0.9151162271055862, 'learning_rate': 2.860316397626922e-07, 'epoch': 0.9} + 90%|████████▉ | 10910/12188 [1:53:46<2:32:55, 7.18s/it] 90%|████████▉ | 10911/12188 [1:53:53<2:30:42, 7.08s/it] {'loss': 0.2737, 'grad_norm': 0.7508579105971467, 'learning_rate': 2.855888464311973e-07, 'epoch': 0.9} + 90%|████████▉ | 10911/12188 [1:53:53<2:30:42, 7.08s/it] 90%|████████▉ | 10912/12188 [1:53:59<2:26:58, 6.91s/it] {'loss': 0.324, 'grad_norm': 0.7504671463251783, 'learning_rate': 2.851463860238446e-07, 'epoch': 0.9} + 90%|████████▉ | 10912/12188 [1:53:59<2:26:58, 6.91s/it] 90%|████████▉ | 10913/12188 [1:54:06<2:28:31, 6.99s/it] {'loss': 0.3118, 'grad_norm': 0.7243239263991109, 'learning_rate': 2.8470425857187847e-07, 'epoch': 0.9} + 90%|████████▉ | 10913/12188 [1:54:06<2:28:31, 6.99s/it] 90%|████████▉ | 10914/12188 [1:54:16<2:46:02, 7.82s/it] {'loss': 0.2962, 'grad_norm': 0.7150438048171645, 'learning_rate': 2.842624641065228e-07, 'epoch': 0.9} + 90%|████████▉ | 10914/12188 [1:54:16<2:46:02, 7.82s/it] 90%|████████▉ | 10915/12188 [1:54:23<2:39:05, 7.50s/it] {'loss': 0.2932, 'grad_norm': 0.7407985933130281, 'learning_rate': 2.8382100265897437e-07, 'epoch': 0.9} + 90%|████████▉ | 10915/12188 [1:54:23<2:39:05, 7.50s/it] 90%|████████▉ | 10916/12188 [1:54:30<2:39:01, 7.50s/it] {'loss': 0.3219, 'grad_norm': 0.7708787008633821, 'learning_rate': 2.833798742604099e-07, 'epoch': 0.9} + 90%|████████▉ | 10916/12188 [1:54:30<2:39:01, 7.50s/it] 90%|████████▉ | 10917/12188 [1:54:37<2:33:10, 7.23s/it] {'loss': 0.2786, 'grad_norm': 2.4149476541879062, 'learning_rate': 2.829390789419817e-07, 'epoch': 0.9} + 90%|████████▉ | 10917/12188 [1:54:37<2:33:10, 7.23s/it] 90%|████████▉ | 10918/12188 [1:54:44<2:30:44, 7.12s/it] {'loss': 0.3296, 'grad_norm': 0.7934096649680883, 'learning_rate': 2.824986167348165e-07, 'epoch': 0.9} + 90%|████████▉ | 10918/12188 [1:54:44<2:30:44, 7.12s/it] 90%|████████▉ | 10919/12188 [1:54:50<2:27:50, 6.99s/it] {'loss': 0.2902, 'grad_norm': 0.7026113829093992, 'learning_rate': 2.820584876700194e-07, 'epoch': 0.9} + 90%|████████▉ | 10919/12188 [1:54:50<2:27:50, 6.99s/it] 90%|████████▉ | 10920/12188 [1:54:57<2:26:55, 6.95s/it] {'loss': 0.2689, 'grad_norm': 0.647293004548505, 'learning_rate': 2.8161869177867285e-07, 'epoch': 0.9} + 90%|████████▉ | 10920/12188 [1:54:57<2:26:55, 6.95s/it] 90%|████████▉ | 10921/12188 [1:55:04<2:27:46, 7.00s/it] {'loss': 0.3168, 'grad_norm': 0.6738017166603993, 'learning_rate': 2.81179229091833e-07, 'epoch': 0.9} + 90%|████████▉ | 10921/12188 [1:55:04<2:27:46, 7.00s/it] 90%|████████▉ | 10922/12188 [1:55:12<2:30:23, 7.13s/it] {'loss': 0.3056, 'grad_norm': 0.8033554741463365, 'learning_rate': 2.8074009964053574e-07, 'epoch': 0.9} + 90%|████████▉ | 10922/12188 [1:55:12<2:30:23, 7.13s/it] 90%|████████▉ | 10923/12188 [1:55:19<2:27:44, 7.01s/it] {'loss': 0.2768, 'grad_norm': 0.7634524289278235, 'learning_rate': 2.8030130345578997e-07, 'epoch': 0.9} + 90%|████████▉ | 10923/12188 [1:55:19<2:27:44, 7.01s/it] 90%|████████▉ | 10924/12188 [1:55:27<2:34:24, 7.33s/it] {'loss': 0.302, 'grad_norm': 0.6642941821060885, 'learning_rate': 2.798628405685827e-07, 'epoch': 0.9} + 90%|████████▉ | 10924/12188 [1:55:27<2:34:24, 7.33s/it] 90%|████████▉ | 10925/12188 [1:55:34<2:36:17, 7.42s/it] {'loss': 0.3015, 'grad_norm': 0.6712780798478656, 'learning_rate': 2.794247110098791e-07, 'epoch': 0.9} + 90%|████████▉ | 10925/12188 [1:55:34<2:36:17, 7.42s/it] 90%|████████▉ | 10926/12188 [1:55:41<2:33:03, 7.28s/it] {'loss': 0.3048, 'grad_norm': 0.7125069174108606, 'learning_rate': 2.789869148106172e-07, 'epoch': 0.9} + 90%|████████▉ | 10926/12188 [1:55:41<2:33:03, 7.28s/it] 90%|████████▉ | 10927/12188 [1:55:48<2:32:41, 7.27s/it] {'loss': 0.316, 'grad_norm': 0.8065920775372195, 'learning_rate': 2.785494520017157e-07, 'epoch': 0.9} + 90%|████████▉ | 10927/12188 [1:55:48<2:32:41, 7.27s/it] 90%|████████▉ | 10928/12188 [1:55:56<2:31:36, 7.22s/it] {'loss': 0.3054, 'grad_norm': 0.7001826256501802, 'learning_rate': 2.7811232261406527e-07, 'epoch': 0.9} + 90%|████████▉ | 10928/12188 [1:55:56<2:31:36, 7.22s/it] 90%|████████▉ | 10929/12188 [1:56:03<2:34:57, 7.39s/it] {'loss': 0.2996, 'grad_norm': 0.7347243611439116, 'learning_rate': 2.7767552667853634e-07, 'epoch': 0.9} + 90%|████████▉ | 10929/12188 [1:56:03<2:34:57, 7.39s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1348, in _get_item + raise ValueError( +ValueError: Number of image tokens ['data/dialogs/other_screenshot/original/ProfileDetailsDialog_1739921015.458943.png'] does not match number of images None +[Try #0] Failed to fetch sample 1868189 in VC:s3://gui-agent/jedi/images/component_v1_130k/component_v1_130k_extracted/. Exception: Number of image tokens ['data/dialogs/other_screenshot/original/ProfileDetailsDialog_1739921015.458943.png'] does not match number of images None +Problematic sample: {'image': 'data/dialogs/other_screenshot/original/ProfileDetailsDialog_1739921015.458943.png', 'conversations': [], 'image_id': 'data/dialogs/other_screenshot/original/ProfileDetailsDialog_1739921015.458943.png'} + 90%|████████▉ | 10930/12188 [1:56:10<2:30:35, 7.18s/it] {'loss': 0.2927, 'grad_norm': 0.6446090531981381, 'learning_rate': 2.7723906422597537e-07, 'epoch': 0.9} + 90%|████████▉ | 10930/12188 [1:56:10<2:30:35, 7.18s/it] 90%|████████▉ | 10931/12188 [1:56:17<2:26:55, 7.01s/it] {'loss': 0.2815, 'grad_norm': 0.7144332651038321, 'learning_rate': 2.7680293528720314e-07, 'epoch': 0.9} + 90%|████████▉ | 10931/12188 [1:56:17<2:26:55, 7.01s/it] 90%|████████▉ | 10932/12188 [1:56:26<2:44:02, 7.84s/it] {'loss': 0.3065, 'grad_norm': 0.7079309331088491, 'learning_rate': 2.763671398930201e-07, 'epoch': 0.9} + 90%|████████▉ | 10932/12188 [1:56:26<2:44:02, 7.84s/it] 90%|████████▉ | 10933/12188 [1:56:33<2:36:34, 7.49s/it] {'loss': 0.3027, 'grad_norm': 0.6719583319505184, 'learning_rate': 2.7593167807419994e-07, 'epoch': 0.9} + 90%|████████▉ | 10933/12188 [1:56:33<2:36:34, 7.49s/it] 90%|████████▉ | 10934/12188 [1:56:41<2:42:05, 7.76s/it] {'loss': 0.3078, 'grad_norm': 0.7360533531029342, 'learning_rate': 2.754965498614948e-07, 'epoch': 0.9} + 90%|████████▉ | 10934/12188 [1:56:41<2:42:05, 7.76s/it] 90%|████████▉ | 10935/12188 [1:56:50<2:47:53, 8.04s/it] {'loss': 0.2778, 'grad_norm': 0.724447040794507, 'learning_rate': 2.7506175528563385e-07, 'epoch': 0.9} + 90%|████████▉ | 10935/12188 [1:56:50<2:47:53, 8.04s/it] 90%|████████▉ | 10936/12188 [1:56:57<2:39:52, 7.66s/it] {'loss': 0.2748, 'grad_norm': 0.62649123889358, 'learning_rate': 2.746272943773198e-07, 'epoch': 0.9} + 90%|████████▉ | 10936/12188 [1:56:57<2:39:52, 7.66s/it] 90%|████████▉ | 10937/12188 [1:57:05<2:39:38, 7.66s/it] {'loss': 0.3277, 'grad_norm': 0.6895103352013905, 'learning_rate': 2.741931671672343e-07, 'epoch': 0.9} + 90%|████████▉ | 10937/12188 [1:57:05<2:39:38, 7.66s/it] 90%|████████▉ | 10938/12188 [1:57:12<2:39:18, 7.65s/it] {'loss': 0.3073, 'grad_norm': 0.6735100633379151, 'learning_rate': 2.73759373686035e-07, 'epoch': 0.9} + 90%|████████▉ | 10938/12188 [1:57:12<2:39:18, 7.65s/it] 90%|████████▉ | 10939/12188 [1:57:19<2:34:02, 7.40s/it] {'loss': 0.2849, 'grad_norm': 0.7134542516678272, 'learning_rate': 2.7332591396435513e-07, 'epoch': 0.9} + 90%|████████▉ | 10939/12188 [1:57:19<2:34:02, 7.40s/it] 90%|████████▉ | 10940/12188 [1:57:26<2:30:36, 7.24s/it] {'loss': 0.2637, 'grad_norm': 0.7787113215650179, 'learning_rate': 2.7289278803280586e-07, 'epoch': 0.9} + 90%|████████▉ | 10940/12188 [1:57:26<2:30:36, 7.24s/it] 90%|████████▉ | 10941/12188 [1:57:34<2:38:40, 7.63s/it] {'loss': 0.2975, 'grad_norm': 0.7699192986227466, 'learning_rate': 2.724599959219726e-07, 'epoch': 0.9} + 90%|████████▉ | 10941/12188 [1:57:34<2:38:40, 7.63s/it] 90%|████████▉ | 10942/12188 [1:57:41<2:33:33, 7.39s/it] {'loss': 0.3316, 'grad_norm': 0.771421543802181, 'learning_rate': 2.720275376624187e-07, 'epoch': 0.9} + 90%|████████▉ | 10942/12188 [1:57:41<2:33:33, 7.39s/it] 90%|████████▉ | 10943/12188 [1:57:51<2:44:52, 7.95s/it] {'loss': 0.2846, 'grad_norm': 0.7227235619714899, 'learning_rate': 2.7159541328468487e-07, 'epoch': 0.9} + 90%|████████▉ | 10943/12188 [1:57:51<2:44:52, 7.95s/it] 90%|████████▉ | 10944/12188 [1:57:58<2:43:40, 7.89s/it] {'loss': 0.2707, 'grad_norm': 0.701707327276715, 'learning_rate': 2.7116362281928543e-07, 'epoch': 0.9} + 90%|████████▉ | 10944/12188 [1:57:58<2:43:40, 7.89s/it] 90%|████████▉ | 10945/12188 [1:58:05<2:36:11, 7.54s/it] {'loss': 0.2667, 'grad_norm': 0.7226938173283675, 'learning_rate': 2.707321662967144e-07, 'epoch': 0.9} + 90%|████████▉ | 10945/12188 [1:58:05<2:36:11, 7.54s/it] 90%|████████▉ | 10946/12188 [1:58:12<2:30:45, 7.28s/it] {'loss': 0.3228, 'grad_norm': 0.6611351276893066, 'learning_rate': 2.703010437474385e-07, 'epoch': 0.9} + 90%|████████▉ | 10946/12188 [1:58:12<2:30:45, 7.28s/it] 90%|████████▉ | 10947/12188 [1:58:18<2:26:23, 7.08s/it] {'loss': 0.3169, 'grad_norm': 0.7475286167604361, 'learning_rate': 2.698702552019045e-07, 'epoch': 0.9} + 90%|████████▉ | 10947/12188 [1:58:18<2:26:23, 7.08s/it] 90%|████████▉ | 10948/12188 [1:58:25<2:25:26, 7.04s/it] {'loss': 0.2995, 'grad_norm': 0.6542295495301631, 'learning_rate': 2.694398006905341e-07, 'epoch': 0.9} + 90%|████████▉ | 10948/12188 [1:58:25<2:25:26, 7.04s/it] 90%|████████▉ | 10949/12188 [1:58:33<2:29:04, 7.22s/it] {'loss': 0.3349, 'grad_norm': 0.7516680005061922, 'learning_rate': 2.690096802437242e-07, 'epoch': 0.9} + 90%|████████▉ | 10949/12188 [1:58:33<2:29:04, 7.22s/it] 90%|████████▉ | 10950/12188 [1:58:40<2:28:01, 7.17s/it] {'loss': 0.2791, 'grad_norm': 0.7851926248255171, 'learning_rate': 2.685798938918505e-07, 'epoch': 0.9} + 90%|████████▉ | 10950/12188 [1:58:40<2:28:01, 7.17s/it] 90%|████████▉ | 10951/12188 [1:58:47<2:27:46, 7.17s/it] {'loss': 0.3244, 'grad_norm': 0.7053432169495597, 'learning_rate': 2.6815044166526247e-07, 'epoch': 0.9} + 90%|████████▉ | 10951/12188 [1:58:47<2:27:46, 7.17s/it] 90%|████████▉ | 10952/12188 [1:58:54<2:26:11, 7.10s/it] {'loss': 0.2944, 'grad_norm': 0.6376767734444132, 'learning_rate': 2.677213235942877e-07, 'epoch': 0.9} + 90%|████████▉ | 10952/12188 [1:58:54<2:26:11, 7.10s/it] 90%|████████▉ | 10953/12188 [1:59:01<2:26:23, 7.11s/it] {'loss': 0.3169, 'grad_norm': 0.7108949010498005, 'learning_rate': 2.672925397092313e-07, 'epoch': 0.9} + 90%|████████▉ | 10953/12188 [1:59:01<2:26:23, 7.11s/it] 90%|████████▉ | 10954/12188 [1:59:09<2:28:30, 7.22s/it] {'loss': 0.2825, 'grad_norm': 0.6986416690263251, 'learning_rate': 2.668640900403707e-07, 'epoch': 0.9} + 90%|████████▉ | 10954/12188 [1:59:09<2:28:30, 7.22s/it] 90%|████████▉ | 10955/12188 [1:59:16<2:26:35, 7.13s/it] {'loss': 0.2928, 'grad_norm': 0.6474496300534479, 'learning_rate': 2.66435974617964e-07, 'epoch': 0.9} + 90%|████████▉ | 10955/12188 [1:59:16<2:26:35, 7.13s/it] 90%|████████▉ | 10956/12188 [1:59:24<2:36:52, 7.64s/it] {'loss': 0.2945, 'grad_norm': 0.6756780977715422, 'learning_rate': 2.6600819347224416e-07, 'epoch': 0.9} + 90%|████████▉ | 10956/12188 [1:59:24<2:36:52, 7.64s/it] 90%|████████▉ | 10957/12188 [1:59:34<2:51:06, 8.34s/it] {'loss': 0.2742, 'grad_norm': 0.6594106954472303, 'learning_rate': 2.6558074663341926e-07, 'epoch': 0.9} + 90%|████████▉ | 10957/12188 [1:59:34<2:51:06, 8.34s/it] 90%|████████▉ | 10958/12188 [1:59:42<2:45:09, 8.06s/it] {'loss': 0.3332, 'grad_norm': 0.8316726060370224, 'learning_rate': 2.6515363413167626e-07, 'epoch': 0.9} + 90%|████████▉ | 10958/12188 [1:59:42<2:45:09, 8.06s/it] 90%|████████▉ | 10959/12188 [1:59:49<2:39:59, 7.81s/it] {'loss': 0.3304, 'grad_norm': 0.7406249165534987, 'learning_rate': 2.647268559971761e-07, 'epoch': 0.9} + 90%|████████▉ | 10959/12188 [1:59:49<2:39:59, 7.81s/it] 90%|████████▉ | 10960/12188 [1:59:57<2:38:56, 7.77s/it] {'loss': 0.2861, 'grad_norm': 0.6714476557904773, 'learning_rate': 2.643004122600573e-07, 'epoch': 0.9} + 90%|████████▉ | 10960/12188 [1:59:57<2:38:56, 7.77s/it] 90%|████████▉ | 10961/12188 [2:00:04<2:34:19, 7.55s/it] {'loss': 0.2592, 'grad_norm': 0.6820222224041373, 'learning_rate': 2.638743029504359e-07, 'epoch': 0.9} + 90%|████████▉ | 10961/12188 [2:00:04<2:34:19, 7.55s/it] 90%|████████▉ | 10962/12188 [2:00:10<2:28:43, 7.28s/it] {'loss': 0.292, 'grad_norm': 0.8658625017456942, 'learning_rate': 2.6344852809840114e-07, 'epoch': 0.9} + 90%|████████▉ | 10962/12188 [2:00:10<2:28:43, 7.28s/it] 90%|████████▉ | 10963/12188 [2:00:18<2:28:18, 7.26s/it] {'loss': 0.2853, 'grad_norm': 0.6881513902831928, 'learning_rate': 2.630230877340223e-07, 'epoch': 0.9} + 90%|████████▉ | 10963/12188 [2:00:18<2:28:18, 7.26s/it] 90%|████████▉ | 10964/12188 [2:00:24<2:25:34, 7.14s/it] {'loss': 0.2659, 'grad_norm': 0.759072456430159, 'learning_rate': 2.625979818873425e-07, 'epoch': 0.9} + 90%|████████▉ | 10964/12188 [2:00:24<2:25:34, 7.14s/it] 90%|████████▉ | 10965/12188 [2:00:31<2:23:19, 7.03s/it] {'loss': 0.3088, 'grad_norm': 0.7259277008942375, 'learning_rate': 2.621732105883812e-07, 'epoch': 0.9} + 90%|████████▉ | 10965/12188 [2:00:31<2:23:19, 7.03s/it] 90%|████████▉ | 10966/12188 [2:00:38<2:21:46, 6.96s/it] {'loss': 0.2914, 'grad_norm': 0.6871182070206967, 'learning_rate': 2.617487738671365e-07, 'epoch': 0.9} + 90%|████████▉ | 10966/12188 [2:00:38<2:21:46, 6.96s/it] 90%|████████▉ | 10967/12188 [2:00:46<2:27:02, 7.23s/it] {'loss': 0.3164, 'grad_norm': 0.6590745968776163, 'learning_rate': 2.6132467175358003e-07, 'epoch': 0.9} + 90%|████████▉ | 10967/12188 [2:00:46<2:27:02, 7.23s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f3c90aeec50> +[Try #0] Failed to fetch sample 4607839 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f3c90aeec50> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Tabs you've opened'"}, {'from': 'gpt', 'value': '\nclick(x=0.6085, y=0.185)\n'}]} + 90%|████████▉ | 10968/12188 [2:00:53<2:23:18, 7.05s/it] {'loss': 0.3279, 'grad_norm': 0.7841070715738664, 'learning_rate': 2.609009042776628e-07, 'epoch': 0.9} + 90%|████████▉ | 10968/12188 [2:00:53<2:23:18, 7.05s/it] 90%|████████▉ | 10969/12188 [2:01:00<2:24:37, 7.12s/it] {'loss': 0.3056, 'grad_norm': 0.6487785954714603, 'learning_rate': 2.6047747146930924e-07, 'epoch': 0.9} + 90%|████████▉ | 10969/12188 [2:01:00<2:24:37, 7.12s/it] 90%|█████████ | 10970/12188 [2:01:07<2:24:37, 7.12s/it] {'loss': 0.2593, 'grad_norm': 0.6473659161272229, 'learning_rate': 2.6005437335842155e-07, 'epoch': 0.9} + 90%|███��█████ | 10970/12188 [2:01:07<2:24:37, 7.12s/it] 90%|█████████ | 10971/12188 [2:01:15<2:29:55, 7.39s/it] {'loss': 0.2923, 'grad_norm': 0.6755981882315768, 'learning_rate': 2.5963160997487967e-07, 'epoch': 0.9} + 90%|█████████ | 10971/12188 [2:01:15<2:29:55, 7.39s/it] 90%|█████████ | 10972/12188 [2:01:24<2:42:59, 8.04s/it] {'loss': 0.2931, 'grad_norm': 0.7177838404799121, 'learning_rate': 2.59209181348537e-07, 'epoch': 0.9} + 90%|█████████ | 10972/12188 [2:01:25<2:42:59, 8.04s/it] 90%|█████████ | 10973/12188 [2:01:32<2:38:27, 7.83s/it] {'loss': 0.3062, 'grad_norm': 0.754206304384468, 'learning_rate': 2.587870875092252e-07, 'epoch': 0.9} + 90%|█████████ | 10973/12188 [2:01:32<2:38:27, 7.83s/it] 90%|█████████ | 10974/12188 [2:01:39<2:33:07, 7.57s/it] {'loss': 0.3349, 'grad_norm': 0.7032384668202015, 'learning_rate': 2.5836532848675267e-07, 'epoch': 0.9} + 90%|█████████ | 10974/12188 [2:01:39<2:33:07, 7.57s/it] 90%|█████████ | 10975/12188 [2:01:47<2:37:27, 7.79s/it] {'loss': 0.2934, 'grad_norm': 0.6621521152012244, 'learning_rate': 2.5794390431090166e-07, 'epoch': 0.9} + 90%|█████████ | 10975/12188 [2:01:47<2:37:27, 7.79s/it] 90%|█████████ | 10976/12188 [2:01:54<2:33:11, 7.58s/it] {'loss': 0.3363, 'grad_norm': 0.7231851557628248, 'learning_rate': 2.575228150114345e-07, 'epoch': 0.9} + 90%|█████████ | 10976/12188 [2:01:54<2:33:11, 7.58s/it] 90%|█████████ | 10977/12188 [2:02:01<2:29:19, 7.40s/it] {'loss': 0.2837, 'grad_norm': 0.6897300109261915, 'learning_rate': 2.571020606180857e-07, 'epoch': 0.9} + 90%|█████████ | 10977/12188 [2:02:01<2:29:19, 7.40s/it] 90%|█████████ | 10978/12188 [2:02:08<2:27:45, 7.33s/it] {'loss': 0.3332, 'grad_norm': 0.8102525494178103, 'learning_rate': 2.5668164116057046e-07, 'epoch': 0.9} + 90%|█████████ | 10978/12188 [2:02:08<2:27:45, 7.33s/it] 90%|█████████ | 10979/12188 [2:02:15<2:22:35, 7.08s/it] {'loss': 0.2991, 'grad_norm': 0.643120276051315, 'learning_rate': 2.562615566685767e-07, 'epoch': 0.9} + 90%|█████████ | 10979/12188 [2:02:15<2:22:35, 7.08s/it] 90%|█████████ | 10980/12188 [2:02:23<2:31:08, 7.51s/it] {'loss': 0.3097, 'grad_norm': 0.7357569171138059, 'learning_rate': 2.5584180717177e-07, 'epoch': 0.9} + 90%|█████████ | 10980/12188 [2:02:23<2:31:08, 7.51s/it] 90%|█████████ | 10981/12188 [2:02:30<2:27:18, 7.32s/it] {'loss': 0.2992, 'grad_norm': 0.660804791571219, 'learning_rate': 2.5542239269979283e-07, 'epoch': 0.9} + 90%|█████████ | 10981/12188 [2:02:30<2:27:18, 7.32s/it] 90%|█████████ | 10982/12188 [2:02:38<2:29:49, 7.45s/it] {'loss': 0.307, 'grad_norm': 0.7149469169297925, 'learning_rate': 2.5500331328226324e-07, 'epoch': 0.9} + 90%|█████████ | 10982/12188 [2:02:38<2:29:49, 7.45s/it] 90%|█████████ | 10983/12188 [2:02:45<2:27:59, 7.37s/it] {'loss': 0.2695, 'grad_norm': 0.7380529770841104, 'learning_rate': 2.5458456894877637e-07, 'epoch': 0.9} + 90%|█████████ | 10983/12188 [2:02:45<2:27:59, 7.37s/it] 90%|█████████ | 10984/12188 [2:02:52<2:23:16, 7.14s/it] {'loss': 0.2887, 'grad_norm': 0.673349333058996, 'learning_rate': 2.5416615972890414e-07, 'epoch': 0.9} + 90%|█████████ | 10984/12188 [2:02:52<2:23:16, 7.14s/it] 90%|█████████ | 10985/12188 [2:03:01<2:38:23, 7.90s/it] {'loss': 0.2867, 'grad_norm': 0.675525054496975, 'learning_rate': 2.5374808565219175e-07, 'epoch': 0.9} + 90%|█████████ | 10985/12188 [2:03:01<2:38:23, 7.90s/it] 90%|█████████ | 10986/12188 [2:03:08<2:29:54, 7.48s/it] {'loss': 0.2845, 'grad_norm': 0.7528596778034459, 'learning_rate': 2.533303467481646e-07, 'epoch': 0.9} + 90%|█████████ | 10986/12188 [2:03:08<2:29:54, 7.48s/it] 90%|█████████ | 10987/12188 [2:03:15<2:28:41, 7.43s/it] {'loss': 0.3083, 'grad_norm': 0.7320094743479311, 'learning_rate': 2.529129430463223e-07, 'epoch': 0.9} + 90%|█████████ | 10987/12188 [2:03:15<2:28:41, 7.43s/it] 90%|█████████ | 10988/12188 [2:03:25<2:40:56, 8.05s/it] {'loss': 0.2839, 'grad_norm': 0.6617059159259602, 'learning_rate': 2.524958745761408e-07, 'epoch': 0.9} + 90%|█████████ | 10988/12188 [2:03:25<2:40:56, 8.05s/it] 90%|█████████ | 10989/12188 [2:03:31<2:31:54, 7.60s/it] {'loss': 0.3485, 'grad_norm': 0.7034405300692741, 'learning_rate': 2.5207914136707366e-07, 'epoch': 0.9} + 90%|█████████ | 10989/12188 [2:03:31<2:31:54, 7.60s/it] 90%|█████████ | 10990/12188 [2:03:38<2:26:33, 7.34s/it] {'loss': 0.2519, 'grad_norm': 0.8115476318846994, 'learning_rate': 2.516627434485491e-07, 'epoch': 0.9} + 90%|█████████ | 10990/12188 [2:03:38<2:26:33, 7.34s/it] 90%|█████████ | 10991/12188 [2:03:45<2:23:14, 7.18s/it] {'loss': 0.3277, 'grad_norm': 0.7472083797993223, 'learning_rate': 2.5124668084997306e-07, 'epoch': 0.9} + 90%|█████████ | 10991/12188 [2:03:45<2:23:14, 7.18s/it] 90%|█████████ | 10992/12188 [2:03:52<2:21:21, 7.09s/it] {'loss': 0.2863, 'grad_norm': 0.634162211539971, 'learning_rate': 2.50830953600727e-07, 'epoch': 0.9} + 90%|█████████ | 10992/12188 [2:03:52<2:21:21, 7.09s/it] 90%|█████████ | 10993/12188 [2:04:02<2:41:31, 8.11s/it] {'loss': 0.3163, 'grad_norm': 0.6929967429918459, 'learning_rate': 2.50415561730169e-07, 'epoch': 0.9} + 90%|█████████ | 10993/12188 [2:04:02<2:41:31, 8.11s/it] 90%|█████████ | 10994/12188 [2:04:10<2:36:59, 7.89s/it] {'loss': 0.3284, 'grad_norm': 0.7273296070690904, 'learning_rate': 2.5000050526763353e-07, 'epoch': 0.9} + 90%|█████████ | 10994/12188 [2:04:10<2:36:59, 7.89s/it] 90%|█████████ | 10995/12188 [2:04:16<2:29:41, 7.53s/it] {'loss': 0.2737, 'grad_norm': 0.7347387676900763, 'learning_rate': 2.4958578424242984e-07, 'epoch': 0.9} + 90%|█████████ | 10995/12188 [2:04:16<2:29:41, 7.53s/it] 90%|█████████ | 10996/12188 [2:04:23<2:24:59, 7.30s/it] {'loss': 0.2862, 'grad_norm': 0.6833167916048214, 'learning_rate': 2.491713986838462e-07, 'epoch': 0.9} + 90%|█████████ | 10996/12188 [2:04:23<2:24:59, 7.30s/it] 90%|█████████ | 10997/12188 [2:04:30<2:23:14, 7.22s/it] {'loss': 0.268, 'grad_norm': 0.6904592393916806, 'learning_rate': 2.4875734862114644e-07, 'epoch': 0.9} + 90%|█████████ | 10997/12188 [2:04:30<2:23:14, 7.22s/it] 90%|█████████ | 10998/12188 [2:04:37<2:23:25, 7.23s/it] {'loss': 0.293, 'grad_norm': 0.6655998790747366, 'learning_rate': 2.4834363408356823e-07, 'epoch': 0.9} + 90%|█████████ | 10998/12188 [2:04:37<2:23:25, 7.23s/it] 90%|█████████ | 10999/12188 [2:04:45<2:24:26, 7.29s/it] {'loss': 0.2879, 'grad_norm': 0.6612945124174688, 'learning_rate': 2.479302551003293e-07, 'epoch': 0.9} + 90%|█████████ | 10999/12188 [2:04:45<2:24:26, 7.29s/it] 90%|█████████ | 11000/12188 [2:04:52<2:27:00, 7.43s/it] {'loss': 0.2612, 'grad_norm': 0.6554125010626053, 'learning_rate': 2.4751721170062026e-07, 'epoch': 0.9} + 90%|█████████ | 11000/12188 [2:04:52<2:27:00, 7.43s/it]/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/utils/checkpoint.py:87: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( + 90%|█████████ | 11001/12188 [2:05:18<4:16:38, 12.97s/it] {'loss': 0.3143, 'grad_norm': 0.6827642471314903, 'learning_rate': 2.471045039136105e-07, 'epoch': 0.9} + 90%|█████████ | 11001/12188 [2:05:18<4:16:38, 12.97s/it] 90%|█████████ | 11002/12188 [2:05:25<3:38:50, 11.07s/it] {'loss': 0.3461, 'grad_norm': 0.7557286937006094, 'learning_rate': 2.4669213176844507e-07, 'epoch': 0.9} + 90%|█████████ | 11002/12188 [2:05:25<3:38:50, 11.07s/it] 90%|█████████ | 11003/12188 [2:05:32<3:16:09, 9.93s/it] {'loss': 0.2943, 'grad_norm': 0.7468966507506408, 'learning_rate': 2.4628009529424403e-07, 'epoch': 0.9} + 90%|█████████ | 11003/12188 [2:05:32<3:16:09, 9.93s/it] 90%|█████████ | 11004/12188 [2:05:39<2:58:54, 9.07s/it] {'loss': 0.3373, 'grad_norm': 0.8790912857992612, 'learning_rate': 2.4586839452010567e-07, 'epoch': 0.9} + 90%|█████████ | 11004/12188 [2:05:39<2:58:54, 9.07s/it] 90%|█████████ | 11005/12188 [2:05:47<2:47:37, 8.50s/it] {'loss': 0.3193, 'grad_norm': 0.7002275295801194, 'learning_rate': 2.454570294751024e-07, 'epoch': 0.9} + 90%|█████████ | 11005/12188 [2:05:47<2:47:37, 8.50s/it] 90%|█████████ | 11006/12188 [2:05:54<2:40:53, 8.17s/it] {'loss': 0.2667, 'grad_norm': 0.6456554939500181, 'learning_rate': 2.4504600018828593e-07, 'epoch': 0.9} + 90%|█████████ | 11006/12188 [2:05:54<2:40:53, 8.17s/it] 90%|█████████ | 11007/12188 [2:06:01<2:33:31, 7.80s/it] {'loss': 0.2901, 'grad_norm': 0.731528446589566, 'learning_rate': 2.446353066886814e-07, 'epoch': 0.9} + 90%|█████████ | 11007/12188 [2:06:01<2:33:31, 7.80s/it] 90%|█████████ | 11008/12188 [2:06:08<2:28:30, 7.55s/it] {'loss': 0.2999, 'grad_norm': 0.9811433634275787, 'learning_rate': 2.4422494900529057e-07, 'epoch': 0.9} + 90%|█████████ | 11008/12188 [2:06:08<2:28:30, 7.55s/it] 90%|█████████ | 11009/12188 [2:06:15<2:25:16, 7.39s/it] {'loss': 0.3032, 'grad_norm': 0.6805559774413524, 'learning_rate': 2.438149271670931e-07, 'epoch': 0.9} + 90%|█████████ | 11009/12188 [2:06:15<2:25:16, 7.39s/it] 90%|█████████ | 11010/12188 [2:06:25<2:42:48, 8.29s/it] {'loss': 0.2631, 'grad_norm': 0.680028521604318, 'learning_rate': 2.434052412030452e-07, 'epoch': 0.9} + 90%|█████████ | 11010/12188 [2:06:25<2:42:48, 8.29s/it] 90%|█████████ | 11011/12188 [2:06:32<2:33:59, 7.85s/it] {'loss': 0.2807, 'grad_norm': 0.789879306147228, 'learning_rate': 2.42995891142076e-07, 'epoch': 0.9} + 90%|█████████ | 11011/12188 [2:06:32<2:33:59, 7.85s/it] 90%|█████████ | 11012/12188 [2:06:43<2:50:34, 8.70s/it] {'loss': 0.3111, 'grad_norm': 0.729298992716329, 'learning_rate': 2.4258687701309513e-07, 'epoch': 0.9} + 90%|█████████ | 11012/12188 [2:06:43<2:50:34, 8.70s/it] 90%|█████████ | 11013/12188 [2:06:51<2:46:14, 8.49s/it] {'loss': 0.2978, 'grad_norm': 0.7058843469500824, 'learning_rate': 2.421781988449851e-07, 'epoch': 0.9} + 90%|█████████ | 11013/12188 [2:06:51<2:46:14, 8.49s/it] 90%|█████████ | 11014/12188 [2:06:57<2:34:11, 7.88s/it] {'loss': 0.3063, 'grad_norm': 0.6548825916833977, 'learning_rate': 2.417698566666066e-07, 'epoch': 0.9} + 90%|█████████ | 11014/12188 [2:06:57<2:34:11, 7.88s/it] 90%|█████████ | 11015/12188 [2:07:05<2:33:07, 7.83s/it] {'loss': 0.3136, 'grad_norm': 0.7180887532119914, 'learning_rate': 2.413618505067966e-07, 'epoch': 0.9} + 90%|█████████ | 11015/12188 [2:07:05<2:33:07, 7.83s/it] 90%|█████████ | 11016/12188 [2:07:14<2:40:16, 8.21s/it] {'loss': 0.306, 'grad_norm': 0.7164444638277768, 'learning_rate': 2.409541803943666e-07, 'epoch': 0.9} + 90%|█████████ | 11016/12188 [2:07:14<2:40:16, 8.21s/it] 90%|█████████ | 11017/12188 [2:07:22<2:38:44, 8.13s/it] {'loss': 0.3245, 'grad_norm': 0.667408976201528, 'learning_rate': 2.4054684635810733e-07, 'epoch': 0.9} + 90%|█████████ | 11017/12188 [2:07:22<2:38:44, 8.13s/it] 90%|█████████ | 11018/12188 [2:07:31<2:45:58, 8.51s/it] {'loss': 0.3111, 'grad_norm': 0.7014232562023901, 'learning_rate': 2.401398484267825e-07, 'epoch': 0.9} + 90%|█████████ | 11018/12188 [2:07:31<2:45:58, 8.51s/it] 90%|█████████ | 11019/12188 [2:07:39<2:39:26, 8.18s/it] {'loss': 0.2975, 'grad_norm': 0.7349235824189287, 'learning_rate': 2.397331866291347e-07, 'epoch': 0.9} + 90%|█████████ | 11019/12188 [2:07:39<2:39:26, 8.18s/it] 90%|█████████ | 11020/12188 [2:07:46<2:34:25, 7.93s/it] {'loss': 0.2906, 'grad_norm': 0.6975225704712222, 'learning_rate': 2.3932686099388094e-07, 'epoch': 0.9} + 90%|█████████ | 11020/12188 [2:07:46<2:34:25, 7.93s/it] 90%|█████████ | 11021/12188 [2:07:53<2:27:38, 7.59s/it] {'loss': 0.3075, 'grad_norm': 0.6597430151998963, 'learning_rate': 2.3892087154971597e-07, 'epoch': 0.9} + 90%|█████████ | 11021/12188 [2:07:53<2:27:38, 7.59s/it] 90%|█████████ | 11022/12188 [2:08:00<2:25:13, 7.47s/it] {'loss': 0.316, 'grad_norm': 0.7330018436185091, 'learning_rate': 2.385152183253103e-07, 'epoch': 0.9} + 90%|█████████ | 11022/12188 [2:08:00<2:25:13, 7.47s/it] 90%|█████████ | 11023/12188 [2:08:07<2:20:58, 7.26s/it] {'loss': 0.2899, 'grad_norm': 0.6886464014926973, 'learning_rate': 2.3810990134930878e-07, 'epoch': 0.9} + 90%|█████████ | 11023/12188 [2:08:07<2:20:58, 7.26s/it] 90%|█████████ | 11024/12188 [2:08:14<2:21:11, 7.28s/it] {'loss': 0.2885, 'grad_norm': 0.6814906885770038, 'learning_rate': 2.3770492065033567e-07, 'epoch': 0.9} + 90%|█████████ | 11024/12188 [2:08:14<2:21:11, 7.28s/it] 90%|█████████ | 11025/12188 [2:08:21<2:15:54, 7.01s/it] {'loss': 0.3124, 'grad_norm': 0.7078748064216891, 'learning_rate': 2.3730027625699036e-07, 'epoch': 0.9} + 90%|█████████ | 11025/12188 [2:08:21<2:15:54, 7.01s/it] 90%|█████████ | 11026/12188 [2:08:29<2:20:57, 7.28s/it] {'loss': 0.2893, 'grad_norm': 0.8906785218252475, 'learning_rate': 2.3689596819784723e-07, 'epoch': 0.9} + 90%|█████████ | 11026/12188 [2:08:29<2:20:57, 7.28s/it] 90%|█████████ | 11027/12188 [2:08:35<2:18:26, 7.15s/it] {'loss': 0.2842, 'grad_norm': 0.7153115699715039, 'learning_rate': 2.3649199650145836e-07, 'epoch': 0.9} + 90%|█████████ | 11027/12188 [2:08:35<2:18:26, 7.15s/it] 90%|█████████ | 11028/12188 [2:08:42<2:17:38, 7.12s/it] {'loss': 0.2874, 'grad_norm': 0.6923444947313763, 'learning_rate': 2.3608836119635214e-07, 'epoch': 0.9} + 90%|█████████ | 11028/12188 [2:08:42<2:17:38, 7.12s/it] 90%|█████████ | 11029/12188 [2:08:50<2:22:37, 7.38s/it] {'loss': 0.2985, 'grad_norm': 0.7484780540838287, 'learning_rate': 2.356850623110313e-07, 'epoch': 0.9} + 90%|█████████ | 11029/12188 [2:08:50<2:22:37, 7.38s/it] 90%|█████████ | 11030/12188 [2:08:57<2:18:16, 7.16s/it] {'loss': 0.2691, 'grad_norm': 0.677634770787667, 'learning_rate': 2.3528209987397798e-07, 'epoch': 0.9} + 90%|█████████ | 11030/12188 [2:08:57<2:18:16, 7.16s/it] 91%|█████████ | 11031/12188 [2:09:04<2:16:20, 7.07s/it] {'loss': 0.3005, 'grad_norm': 0.7320747400883704, 'learning_rate': 2.3487947391364673e-07, 'epoch': 0.91} + 91%|█████████ | 11031/12188 [2:09:04<2:16:20, 7.07s/it] 91%|█████████ | 11032/12188 [2:09:12<2:19:47, 7.26s/it] {'loss': 0.2649, 'grad_norm': 0.7604779769066777, 'learning_rate': 2.3447718445847145e-07, 'epoch': 0.91} + 91%|█████████ | 11032/12188 [2:09:12<2:19:47, 7.26s/it] 91%|█████████ | 11033/12188 [2:09:20<2:25:00, 7.53s/it] {'loss': 0.3336, 'grad_norm': 0.7206008328070856, 'learning_rate': 2.3407523153686108e-07, 'epoch': 0.91} + 91%|█████████ | 11033/12188 [2:09:20<2:25:00, 7.53s/it] 91%|█████████ | 11034/12188 [2:09:27<2:25:25, 7.56s/it] {'loss': 0.2938, 'grad_norm': 0.6733096008503553, 'learning_rate': 2.336736151772012e-07, 'epoch': 0.91} + 91%|███████���█ | 11034/12188 [2:09:27<2:25:25, 7.56s/it] 91%|█████████ | 11035/12188 [2:09:34<2:20:44, 7.32s/it] {'loss': 0.357, 'grad_norm': 0.7275701887117747, 'learning_rate': 2.3327233540785255e-07, 'epoch': 0.91} + 91%|█████████ | 11035/12188 [2:09:34<2:20:44, 7.32s/it] 91%|█████████ | 11036/12188 [2:09:41<2:18:40, 7.22s/it] {'loss': 0.2801, 'grad_norm': 0.7616743886187898, 'learning_rate': 2.3287139225715294e-07, 'epoch': 0.91} + 91%|█████████ | 11036/12188 [2:09:41<2:18:40, 7.22s/it] 91%|█████████ | 11037/12188 [2:09:48<2:17:08, 7.15s/it] {'loss': 0.3455, 'grad_norm': 0.8678938570702711, 'learning_rate': 2.3247078575341697e-07, 'epoch': 0.91} + 91%|█████████ | 11037/12188 [2:09:48<2:17:08, 7.15s/it] 91%|█████████ | 11038/12188 [2:09:55<2:15:19, 7.06s/it] {'loss': 0.3228, 'grad_norm': 0.7385256188992667, 'learning_rate': 2.3207051592493478e-07, 'epoch': 0.91} + 91%|█████████ | 11038/12188 [2:09:55<2:15:19, 7.06s/it] 91%|█████████ | 11039/12188 [2:10:02<2:14:15, 7.01s/it] {'loss': 0.3095, 'grad_norm': 0.6777975117126034, 'learning_rate': 2.3167058279997156e-07, 'epoch': 0.91} + 91%|█████████ | 11039/12188 [2:10:02<2:14:15, 7.01s/it] 91%|█████████ | 11040/12188 [2:10:09<2:13:31, 6.98s/it] {'loss': 0.2794, 'grad_norm': 0.7062426537911954, 'learning_rate': 2.312709864067719e-07, 'epoch': 0.91} + 91%|█████████ | 11040/12188 [2:10:09<2:13:31, 6.98s/it] 91%|█████████ | 11041/12188 [2:10:17<2:17:50, 7.21s/it] {'loss': 0.2963, 'grad_norm': 0.7065286781576353, 'learning_rate': 2.3087172677355275e-07, 'epoch': 0.91} + 91%|█████████ | 11041/12188 [2:10:17<2:17:50, 7.21s/it] 91%|█████████ | 11042/12188 [2:10:24<2:17:49, 7.22s/it] {'loss': 0.3296, 'grad_norm': 0.656814159774793, 'learning_rate': 2.3047280392851035e-07, 'epoch': 0.91} + 91%|█████████ | 11042/12188 [2:10:24<2:17:49, 7.22s/it] 91%|█████████ | 11043/12188 [2:10:30<2:14:02, 7.02s/it] {'loss': 0.2982, 'grad_norm': 0.7312474263157764, 'learning_rate': 2.300742178998161e-07, 'epoch': 0.91} + 91%|█████████ | 11043/12188 [2:10:30<2:14:02, 7.02s/it] 91%|█████████ | 11044/12188 [2:10:37<2:12:05, 6.93s/it] {'loss': 0.2385, 'grad_norm': 0.6831301583969049, 'learning_rate': 2.2967596871561694e-07, 'epoch': 0.91} + 91%|█████████ | 11044/12188 [2:10:37<2:12:05, 6.93s/it] 91%|█████████ | 11045/12188 [2:10:44<2:14:32, 7.06s/it] {'loss': 0.2721, 'grad_norm': 0.6207243313635366, 'learning_rate': 2.2927805640403645e-07, 'epoch': 0.91} + 91%|█████████ | 11045/12188 [2:10:44<2:14:32, 7.06s/it] 91%|█████████ | 11046/12188 [2:10:51<2:13:19, 7.00s/it] {'loss': 0.2884, 'grad_norm': 0.6764552470069918, 'learning_rate': 2.2888048099317495e-07, 'epoch': 0.91} + 91%|█████████ | 11046/12188 [2:10:51<2:13:19, 7.00s/it] 91%|█████████ | 11047/12188 [2:10:59<2:19:56, 7.36s/it] {'loss': 0.2958, 'grad_norm': 0.7188902437034579, 'learning_rate': 2.2848324251110942e-07, 'epoch': 0.91} + 91%|█████████ | 11047/12188 [2:10:59<2:19:56, 7.36s/it] 91%|█████████ | 11048/12188 [2:11:07<2:17:56, 7.26s/it] {'loss': 0.3379, 'grad_norm': 0.7215180796920653, 'learning_rate': 2.2808634098589022e-07, 'epoch': 0.91} + 91%|█████████ | 11048/12188 [2:11:07<2:17:56, 7.26s/it] 91%|█████████ | 11049/12188 [2:11:16<2:30:49, 7.94s/it] {'loss': 0.2714, 'grad_norm': 0.7207378734258602, 'learning_rate': 2.2768977644554767e-07, 'epoch': 0.91} + 91%|█████████ | 11049/12188 [2:11:16<2:30:49, 7.94s/it] 91%|█████████ | 11050/12188 [2:11:24<2:28:50, 7.85s/it] {'loss': 0.3101, 'grad_norm': 0.6790691573814155, 'learning_rate': 2.2729354891808498e-07, 'epoch': 0.91} + 91%|█████████ | 11050/12188 [2:11:24<2:28:50, 7.85s/it] 91%|█████████ | 11051/12188 [2:11:31<2:25:59, 7.70s/it] {'loss': 0.2841, 'grad_norm': 0.679103538056945, 'learning_rate': 2.268976584314847e-07, 'epoch': 0.91} + 91%|█████████ | 11051/12188 [2:11:31<2:25:59, 7.70s/it] 91%|█████████ | 11052/12188 [2:11:38<2:24:12, 7.62s/it] {'loss': 0.3024, 'grad_norm': 0.6997253795121451, 'learning_rate': 2.265021050137023e-07, 'epoch': 0.91} + 91%|█████████ | 11052/12188 [2:11:38<2:24:12, 7.62s/it] 91%|█████████ | 11053/12188 [2:11:46<2:20:59, 7.45s/it] {'loss': 0.2956, 'grad_norm': 0.7554713356875713, 'learning_rate': 2.2610688869267318e-07, 'epoch': 0.91} + 91%|█████████ | 11053/12188 [2:11:46<2:20:59, 7.45s/it] 91%|█████████ | 11054/12188 [2:11:52<2:17:51, 7.29s/it] {'loss': 0.2923, 'grad_norm': 0.620640595671181, 'learning_rate': 2.2571200949630446e-07, 'epoch': 0.91} + 91%|█████████ | 11054/12188 [2:11:52<2:17:51, 7.29s/it] 91%|█████████ | 11055/12188 [2:12:00<2:20:45, 7.45s/it] {'loss': 0.3049, 'grad_norm': 0.6869331887422572, 'learning_rate': 2.2531746745248384e-07, 'epoch': 0.91} + 91%|█████████ | 11055/12188 [2:12:00<2:20:45, 7.45s/it] 91%|█████████ | 11056/12188 [2:12:08<2:21:00, 7.47s/it] {'loss': 0.2761, 'grad_norm': 0.6721004226437561, 'learning_rate': 2.2492326258907293e-07, 'epoch': 0.91} + 91%|█████████ | 11056/12188 [2:12:08<2:21:00, 7.47s/it] 91%|█████████ | 11057/12188 [2:12:16<2:26:02, 7.75s/it] {'loss': 0.3268, 'grad_norm': 0.7104628579516478, 'learning_rate': 2.2452939493390892e-07, 'epoch': 0.91} + 91%|█████████ | 11057/12188 [2:12:16<2:26:02, 7.75s/it] 91%|█████████ | 11058/12188 [2:12:24<2:28:20, 7.88s/it] {'loss': 0.2919, 'grad_norm': 0.7463372848870595, 'learning_rate': 2.241358645148062e-07, 'epoch': 0.91} + 91%|█████████ | 11058/12188 [2:12:24<2:28:20, 7.88s/it] 91%|█████████ | 11059/12188 [2:12:31<2:20:49, 7.48s/it] {'loss': 0.2837, 'grad_norm': 0.6377042330128561, 'learning_rate': 2.2374267135955695e-07, 'epoch': 0.91} + 91%|█████████ | 11059/12188 [2:12:31<2:20:49, 7.48s/it] 91%|█████████ | 11060/12188 [2:12:38<2:16:09, 7.24s/it] {'loss': 0.2806, 'grad_norm': 0.7450822765410253, 'learning_rate': 2.2334981549592627e-07, 'epoch': 0.91} + 91%|█████████ | 11060/12188 [2:12:38<2:16:09, 7.24s/it] 91%|█████████ | 11061/12188 [2:12:44<2:13:43, 7.12s/it] {'loss': 0.2909, 'grad_norm': 0.6888527534434731, 'learning_rate': 2.229572969516569e-07, 'epoch': 0.91} + 91%|█████████ | 11061/12188 [2:12:44<2:13:43, 7.12s/it] 91%|█████████ | 11062/12188 [2:12:51<2:11:57, 7.03s/it] {'loss': 0.3515, 'grad_norm': 0.7546822695413938, 'learning_rate': 2.2256511575446837e-07, 'epoch': 0.91} + 91%|█████████ | 11062/12188 [2:12:51<2:11:57, 7.03s/it] 91%|█████████ | 11063/12188 [2:12:58<2:12:56, 7.09s/it] {'loss': 0.3026, 'grad_norm': 0.7642555187381871, 'learning_rate': 2.2217327193205572e-07, 'epoch': 0.91} + 91%|█████████ | 11063/12188 [2:12:59<2:12:56, 7.09s/it] 91%|█████████ | 11064/12188 [2:13:06<2:14:01, 7.15s/it] {'loss': 0.3058, 'grad_norm': 0.7001843141813285, 'learning_rate': 2.217817655120913e-07, 'epoch': 0.91} + 91%|█████████ | 11064/12188 [2:13:06<2:14:01, 7.15s/it] 91%|█████████ | 11065/12188 [2:13:13<2:12:06, 7.06s/it] {'loss': 0.296, 'grad_norm': 0.6505546853187304, 'learning_rate': 2.2139059652222074e-07, 'epoch': 0.91} + 91%|█████████ | 11065/12188 [2:13:13<2:12:06, 7.06s/it] 91%|█████████ | 11066/12188 [2:13:20<2:13:20, 7.13s/it] {'loss': 0.3023, 'grad_norm': 0.695232258276319, 'learning_rate': 2.209997649900697e-07, 'epoch': 0.91} + 91%|█████████ | 11066/12188 [2:13:20<2:13:20, 7.13s/it] 91%|█████████ | 11067/12188 [2:13:28<2:17:55, 7.38s/it] {'loss': 0.2626, 'grad_norm': 0.6762874207250662, 'learning_rate': 2.2060927094323613e-07, 'epoch': 0.91} + 91%|█████████ | 11067/12188 [2:13:28<2:17:55, 7.38s/it] 91%|█████████ | 11068/12188 [2:13:35<2:13:37, 7.16s/it] {'loss': 0.2574, 'grad_norm': 0.6838073555845874, 'learning_rate': 2.2021911440929745e-07, 'epoch': 0.91} + 91%|█████████ | 11068/12188 [2:13:35<2:13:37, 7.16s/it] 91%|█████████ | 11069/12188 [2:13:41<2:11:08, 7.03s/it] {'loss': 0.3177, 'grad_norm': 0.769464222865384, 'learning_rate': 2.1982929541580656e-07, 'epoch': 0.91} + 91%|█████████ | 11069/12188 [2:13:41<2:11:08, 7.03s/it] 91%|█████████ | 11070/12188 [2:13:48<2:09:38, 6.96s/it] {'loss': 0.2959, 'grad_norm': 0.6669133243080018, 'learning_rate': 2.194398139902898e-07, 'epoch': 0.91} + 91%|█████████ | 11070/12188 [2:13:48<2:09:38, 6.96s/it] 91%|█████████ | 11071/12188 [2:13:55<2:08:22, 6.90s/it] {'loss': 0.2952, 'grad_norm': 0.7435874681565928, 'learning_rate': 2.1905067016025404e-07, 'epoch': 0.91} + 91%|█████████ | 11071/12188 [2:13:55<2:08:22, 6.90s/it] 91%|█████████ | 11072/12188 [2:14:02<2:07:09, 6.84s/it] {'loss': 0.2879, 'grad_norm': 0.8619789984554755, 'learning_rate': 2.1866186395317735e-07, 'epoch': 0.91} + 91%|█████████ | 11072/12188 [2:14:02<2:07:09, 6.84s/it] 91%|█████████ | 11073/12188 [2:14:08<2:06:30, 6.81s/it] {'loss': 0.3244, 'grad_norm': 0.749076517045345, 'learning_rate': 2.1827339539651882e-07, 'epoch': 0.91} + 91%|█████████ | 11073/12188 [2:14:08<2:06:30, 6.81s/it] 91%|█████████ | 11074/12188 [2:14:15<2:04:56, 6.73s/it] {'loss': 0.2917, 'grad_norm': 0.9491483260654181, 'learning_rate': 2.1788526451771096e-07, 'epoch': 0.91} + 91%|█████████ | 11074/12188 [2:14:15<2:04:56, 6.73s/it] 91%|█████████ | 11075/12188 [2:14:21<2:04:41, 6.72s/it] {'loss': 0.3045, 'grad_norm': 0.9803789846663369, 'learning_rate': 2.1749747134416242e-07, 'epoch': 0.91} + 91%|█████████ | 11075/12188 [2:14:22<2:04:41, 6.72s/it] 91%|█████████ | 11076/12188 [2:14:31<2:19:09, 7.51s/it] {'loss': 0.3476, 'grad_norm': 0.8793736771510101, 'learning_rate': 2.1711001590325854e-07, 'epoch': 0.91} + 91%|█████████ | 11076/12188 [2:14:31<2:19:09, 7.51s/it] 91%|█████████ | 11077/12188 [2:14:37<2:13:28, 7.21s/it] {'loss': 0.3224, 'grad_norm': 0.6818692551714393, 'learning_rate': 2.1672289822236126e-07, 'epoch': 0.91} + 91%|█████████ | 11077/12188 [2:14:37<2:13:28, 7.21s/it] 91%|█████████ | 11078/12188 [2:14:44<2:12:45, 7.18s/it] {'loss': 0.3198, 'grad_norm': 0.7381175710418979, 'learning_rate': 2.1633611832880762e-07, 'epoch': 0.91} + 91%|█████████ | 11078/12188 [2:14:44<2:12:45, 7.18s/it] 91%|█████████ | 11079/12188 [2:14:52<2:15:01, 7.31s/it] {'loss': 0.2717, 'grad_norm': 0.7275070544549062, 'learning_rate': 2.1594967624991248e-07, 'epoch': 0.91} + 91%|█████████ | 11079/12188 [2:14:52<2:15:01, 7.31s/it] 91%|█████████ | 11080/12188 [2:14:59<2:15:00, 7.31s/it] {'loss': 0.2874, 'grad_norm': 0.7034178219079729, 'learning_rate': 2.1556357201296396e-07, 'epoch': 0.91} + 91%|█████████ | 11080/12188 [2:14:59<2:15:00, 7.31s/it] 91%|█████████ | 11081/12188 [2:15:06<2:12:54, 7.20s/it] {'loss': 0.2634, 'grad_norm': 0.7415395234523341, 'learning_rate': 2.1517780564522973e-07, 'epoch': 0.91} + 91%|█████████ | 11081/12188 [2:15:06<2:12:54, 7.20s/it] 91%|█████████ | 11082/12188 [2:15:13<2:11:40, 7.14s/it] {'loss': 0.3154, 'grad_norm': 0.8250564756336702, 'learning_rate': 2.1479237717395185e-07, 'epoch': 0.91} + 91%|█████████ | 11082/12188 [2:15:13<2:11:40, 7.14s/it] 91%|█████████ | 11083/12188 [2:15:20<2:08:19, 6.97s/it] {'loss': 0.2691, 'grad_norm': 0.8926233860674029, 'learning_rate': 2.1440728662634746e-07, 'epoch': 0.91} + 91%|█████████ | 11083/12188 [2:15:20<2:08:19, 6.97s/it] 91%|█████████ | 11084/12188 [2:15:27<2:06:35, 6.88s/it] {'loss': 0.271, 'grad_norm': 0.7888520918153376, 'learning_rate': 2.1402253402961316e-07, 'epoch': 0.91} + 91%|█████████ | 11084/12188 [2:15:27<2:06:35, 6.88s/it] 91%|█████████ | 11085/12188 [2:15:33<2:04:07, 6.75s/it] {'loss': 0.3031, 'grad_norm': 0.6795870033276029, 'learning_rate': 2.136381194109166e-07, 'epoch': 0.91} + 91%|█████████ | 11085/12188 [2:15:33<2:04:07, 6.75s/it] 91%|█████████ | 11086/12188 [2:15:40<2:04:21, 6.77s/it] {'loss': 0.3078, 'grad_norm': 0.7229759877636404, 'learning_rate': 2.1325404279740725e-07, 'epoch': 0.91} + 91%|█████████ | 11086/12188 [2:15:40<2:04:21, 6.77s/it] 91%|█████████ | 11087/12188 [2:15:47<2:07:50, 6.97s/it] {'loss': 0.2722, 'grad_norm': 0.6204152903257857, 'learning_rate': 2.1287030421620669e-07, 'epoch': 0.91} + 91%|█████████ | 11087/12188 [2:15:47<2:07:50, 6.97s/it] 91%|█████████ | 11088/12188 [2:15:54<2:07:45, 6.97s/it] {'loss': 0.3215, 'grad_norm': 0.834380715220378, 'learning_rate': 2.1248690369441382e-07, 'epoch': 0.91} + 91%|█████████ | 11088/12188 [2:15:54<2:07:45, 6.97s/it] 91%|█████████ | 11089/12188 [2:16:02<2:13:13, 7.27s/it] {'loss': 0.2826, 'grad_norm': 0.7106748160101138, 'learning_rate': 2.1210384125910477e-07, 'epoch': 0.91} + 91%|█████████ | 11089/12188 [2:16:02<2:13:13, 7.27s/it] 91%|█████████ | 11090/12188 [2:16:10<2:16:22, 7.45s/it] {'loss': 0.3073, 'grad_norm': 0.6837669472644113, 'learning_rate': 2.1172111693733012e-07, 'epoch': 0.91} + 91%|█████████ | 11090/12188 [2:16:10<2:16:22, 7.45s/it] 91%|█████████ | 11091/12188 [2:16:17<2:15:00, 7.38s/it] {'loss': 0.2996, 'grad_norm': 0.6886356015241566, 'learning_rate': 2.1133873075611654e-07, 'epoch': 0.91} + 91%|█████████ | 11091/12188 [2:16:17<2:15:00, 7.38s/it] 91%|█████████ | 11092/12188 [2:16:24<2:11:12, 7.18s/it] {'loss': 0.2729, 'grad_norm': 0.6401484253762465, 'learning_rate': 2.1095668274246862e-07, 'epoch': 0.91} + 91%|█████████ | 11092/12188 [2:16:24<2:11:12, 7.18s/it] 91%|█████████ | 11093/12188 [2:16:31<2:09:20, 7.09s/it] {'loss': 0.3207, 'grad_norm': 0.8195798450428998, 'learning_rate': 2.1057497292336525e-07, 'epoch': 0.91} + 91%|█████████ | 11093/12188 [2:16:31<2:09:20, 7.09s/it] 91%|█████████ | 11094/12188 [2:16:39<2:14:03, 7.35s/it] {'loss': 0.3305, 'grad_norm': 0.6754858434660501, 'learning_rate': 2.1019360132576272e-07, 'epoch': 0.91} + 91%|█████████ | 11094/12188 [2:16:39<2:14:03, 7.35s/it] 91%|█████████ | 11095/12188 [2:16:46<2:12:27, 7.27s/it] {'loss': 0.3174, 'grad_norm': 0.7285044958021905, 'learning_rate': 2.0981256797659277e-07, 'epoch': 0.91} + 91%|█████████ | 11095/12188 [2:16:46<2:12:27, 7.27s/it] 91%|█████████ | 11096/12188 [2:16:54<2:14:42, 7.40s/it] {'loss': 0.3355, 'grad_norm': 0.7024232288939957, 'learning_rate': 2.094318729027628e-07, 'epoch': 0.91} + 91%|█████████ | 11096/12188 [2:16:54<2:14:42, 7.40s/it] 91%|█████████ | 11097/12188 [2:17:01<2:11:42, 7.24s/it] {'loss': 0.2677, 'grad_norm': 0.6709320014957093, 'learning_rate': 2.0905151613115792e-07, 'epoch': 0.91} + 91%|█████████ | 11097/12188 [2:17:01<2:11:42, 7.24s/it] 91%|█████████ | 11098/12188 [2:17:08<2:10:43, 7.20s/it] {'loss': 0.3227, 'grad_norm': 0.7682577333566538, 'learning_rate': 2.0867149768863726e-07, 'epoch': 0.91} + 91%|█████████ | 11098/12188 [2:17:08<2:10:43, 7.20s/it] 91%|█████████ | 11099/12188 [2:17:14<2:08:39, 7.09s/it] {'loss': 0.2942, 'grad_norm': 0.6904565389905962, 'learning_rate': 2.0829181760203766e-07, 'epoch': 0.91} + 91%|█████████ | 11099/12188 [2:17:14<2:08:39, 7.09s/it] 91%|█████████ | 11100/12188 [2:17:21<2:07:05, 7.01s/it] {'loss': 0.3365, 'grad_norm': 0.6777989209832223, 'learning_rate': 2.0791247589817155e-07, 'epoch': 0.91} + 91%|█████████ | 11100/12188 [2:17:21<2:07:05, 7.01s/it] 91%|█████████ | 11101/12188 [2:17:29<2:12:48, 7.33s/it] {'loss': 0.3023, 'grad_norm': 0.7489944894787629, 'learning_rate': 2.0753347260382695e-07, 'epoch': 0.91} + 91%|█████████ | 11101/12188 [2:17:29<2:12:48, 7.33s/it] 91%|█████████ | 11102/12188 [2:17:37<2:14:26, 7.43s/it] {'loss': 0.2795, 'grad_norm': 0.7119519748181744, 'learning_rate': 2.0715480774576967e-07, 'epoch': 0.91} + 91%|█████████ | 11102/12188 [2:17:37<2:14:26, 7.43s/it] 91%|█████████ | 11103/12188 [2:17:44<2:13:54, 7.41s/it] {'loss': 0.3342, 'grad_norm': 0.7961949330112428, 'learning_rate': 2.0677648135073948e-07, 'epoch': 0.91} + 91%|█████████ | 11103/12188 [2:17:44<2:13:54, 7.41s/it] 91%|█████████ | 11104/12188 [2:17:51<2:11:53, 7.30s/it] {'loss': 0.3085, 'grad_norm': 0.7252754528515838, 'learning_rate': 2.063984934454527e-07, 'epoch': 0.91} + 91%|█████████ | 11104/12188 [2:17:51<2:11:53, 7.30s/it] 91%|█████████ | 11105/12188 [2:17:58<2:09:57, 7.20s/it] {'loss': 0.2987, 'grad_norm': 0.6866730771107611, 'learning_rate': 2.0602084405660306e-07, 'epoch': 0.91} + 91%|█████████ | 11105/12188 [2:17:58<2:09:57, 7.20s/it] 91%|█████████ | 11106/12188 [2:18:06<2:12:23, 7.34s/it] {'loss': 0.3069, 'grad_norm': 0.7342602177383414, 'learning_rate': 2.0564353321085918e-07, 'epoch': 0.91} + 91%|█████████ | 11106/12188 [2:18:06<2:12:23, 7.34s/it] 91%|█████████ | 11107/12188 [2:18:14<2:13:08, 7.39s/it] {'loss': 0.3024, 'grad_norm': 0.7518639062834843, 'learning_rate': 2.052665609348664e-07, 'epoch': 0.91} + 91%|█████████ | 11107/12188 [2:18:14<2:13:08, 7.39s/it] 91%|█████████ | 11108/12188 [2:18:21<2:11:41, 7.32s/it] {'loss': 0.2597, 'grad_norm': 0.970134493159293, 'learning_rate': 2.048899272552457e-07, 'epoch': 0.91} + 91%|█████████ | 11108/12188 [2:18:21<2:11:41, 7.32s/it] 91%|█████████ | 11109/12188 [2:18:28<2:09:54, 7.22s/it] {'loss': 0.3123, 'grad_norm': 0.6978716343629118, 'learning_rate': 2.045136321985941e-07, 'epoch': 0.91} + 91%|█████████ | 11109/12188 [2:18:28<2:09:54, 7.22s/it] 91%|█████████ | 11110/12188 [2:18:35<2:08:13, 7.14s/it] {'loss': 0.2606, 'grad_norm': 0.7029732719268498, 'learning_rate': 2.0413767579148536e-07, 'epoch': 0.91} + 91%|█████████ | 11110/12188 [2:18:35<2:08:13, 7.14s/it] 91%|█████████ | 11111/12188 [2:18:42<2:09:37, 7.22s/it] {'loss': 0.2955, 'grad_norm': 0.7285063351306876, 'learning_rate': 2.0376205806046878e-07, 'epoch': 0.91} + 91%|█████████ | 11111/12188 [2:18:42<2:09:37, 7.22s/it] 91%|█████████ | 11112/12188 [2:18:49<2:05:50, 7.02s/it] {'loss': 0.3333, 'grad_norm': 0.6650475437114827, 'learning_rate': 2.0338677903206982e-07, 'epoch': 0.91} + 91%|█████████ | 11112/12188 [2:18:49<2:05:50, 7.02s/it] 91%|█████████ | 11113/12188 [2:18:55<2:04:07, 6.93s/it] {'loss': 0.3353, 'grad_norm': 0.723333718233821, 'learning_rate': 2.0301183873279116e-07, 'epoch': 0.91} + 91%|█████████ | 11113/12188 [2:18:55<2:04:07, 6.93s/it] 91%|█████████ | 11114/12188 [2:19:05<2:16:19, 7.62s/it] {'loss': 0.2931, 'grad_norm': 0.6853342377203856, 'learning_rate': 2.0263723718910831e-07, 'epoch': 0.91} + 91%|█████████ | 11114/12188 [2:19:05<2:16:19, 7.62s/it] 91%|█████████ | 11115/12188 [2:19:11<2:11:24, 7.35s/it] {'loss': 0.336, 'grad_norm': 0.7455015178684545, 'learning_rate': 2.0226297442747733e-07, 'epoch': 0.91} + 91%|█████████ | 11115/12188 [2:19:11<2:11:24, 7.35s/it] 91%|█████████ | 11116/12188 [2:19:19<2:11:13, 7.34s/it] {'loss': 0.2782, 'grad_norm': 0.6649012261901025, 'learning_rate': 2.0188905047432594e-07, 'epoch': 0.91} + 91%|█████████ | 11116/12188 [2:19:19<2:11:13, 7.34s/it] 91%|█████████ | 11117/12188 [2:19:25<2:07:15, 7.13s/it] {'loss': 0.3323, 'grad_norm': 0.775523884439381, 'learning_rate': 2.0151546535606193e-07, 'epoch': 0.91} + 91%|█████████ | 11117/12188 [2:19:25<2:07:15, 7.13s/it] 91%|█████████ | 11118/12188 [2:19:32<2:06:31, 7.09s/it] {'loss': 0.2898, 'grad_norm': 0.7157299044295505, 'learning_rate': 2.0114221909906584e-07, 'epoch': 0.91} + 91%|█████████ | 11118/12188 [2:19:32<2:06:31, 7.09s/it] 91%|█████████ | 11119/12188 [2:19:42<2:18:15, 7.76s/it] {'loss': 0.286, 'grad_norm': 0.7799069351173589, 'learning_rate': 2.0076931172969605e-07, 'epoch': 0.91} + 91%|█████████ | 11119/12188 [2:19:42<2:18:15, 7.76s/it] 91%|█████████ | 11120/12188 [2:19:48<2:12:40, 7.45s/it] {'loss': 0.2625, 'grad_norm': 0.6763613731449268, 'learning_rate': 2.003967432742876e-07, 'epoch': 0.91} + 91%|█████████ | 11120/12188 [2:19:48<2:12:40, 7.45s/it] 91%|█████████ | 11121/12188 [2:19:57<2:19:37, 7.85s/it] {'loss': 0.3278, 'grad_norm': 0.6977166352717672, 'learning_rate': 2.0002451375914944e-07, 'epoch': 0.91} + 91%|█████████ | 11121/12188 [2:19:57<2:19:37, 7.85s/it] 91%|█████████▏| 11122/12188 [2:20:05<2:18:23, 7.79s/it] {'loss': 0.304, 'grad_norm': 0.7450823115613817, 'learning_rate': 1.9965262321056832e-07, 'epoch': 0.91} + 91%|█████████▏| 11122/12188 [2:20:05<2:18:23, 7.79s/it] 91%|█████████▏| 11123/12188 [2:20:11<2:12:20, 7.46s/it] {'loss': 0.2957, 'grad_norm': 0.7542868835870907, 'learning_rate': 1.9928107165480715e-07, 'epoch': 0.91} + 91%|█████████▏| 11123/12188 [2:20:11<2:12:20, 7.46s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f1506adf830> +[Try #0] Failed to fetch sample 4385311 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f1506adf830> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'HyperKitty'"}, {'from': 'gpt', 'value': '\nclick(x=0.4145, y=0.2765)\n'}]} + 91%|█████████▏| 11124/12188 [2:20:19<2:11:40, 7.42s/it] {'loss': 0.2837, 'grad_norm': 0.6840817808394947, 'learning_rate': 1.9890985911810322e-07, 'epoch': 0.91} + 91%|█████████▏| 11124/12188 [2:20:19<2:11:40, 7.42s/it] 91%|█████████▏| 11125/12188 [2:20:26<2:09:51, 7.33s/it] {'loss': 0.2793, 'grad_norm': 0.695368038896673, 'learning_rate': 1.9853898562667174e-07, 'epoch': 0.91} + 91%|█████████▏| 11125/12188 [2:20:26<2:09:51, 7.33s/it] 91%|█████████▏| 11126/12188 [2:20:33<2:07:01, 7.18s/it] {'loss': 0.2889, 'grad_norm': 0.6797097208024142, 'learning_rate': 1.981684512067028e-07, 'epoch': 0.91} + 91%|█████████▏| 11126/12188 [2:20:33<2:07:01, 7.18s/it] 91%|█████████▏| 11127/12188 [2:20:39<2:04:09, 7.02s/it] {'loss': 0.3163, 'grad_norm': 0.7147370595104895, 'learning_rate': 1.9779825588436276e-07, 'epoch': 0.91} + 91%|█████████▏| 11127/12188 [2:20:39<2:04:09, 7.02s/it] 91%|█████████▏| 11128/12188 [2:20:46<2:03:23, 6.98s/it] {'loss': 0.2833, 'grad_norm': 0.8306061124823119, 'learning_rate': 1.9742839968579562e-07, 'epoch': 0.91} + 91%|█████████▏| 11128/12188 [2:20:46<2:03:23, 6.98s/it] 91%|█████████▏| 11129/12188 [2:20:53<2:04:30, 7.05s/it] {'loss': 0.2796, 'grad_norm': 0.6950524871749059, 'learning_rate': 1.9705888263711837e-07, 'epoch': 0.91} + 91%|█████████▏| 11129/12188 [2:20:53<2:04:30, 7.05s/it] 91%|█████████▏| 11130/12188 [2:21:00<2:02:23, 6.94s/it] {'loss': 0.3196, 'grad_norm': 0.7393135869965219, 'learning_rate': 1.9668970476442617e-07, 'epoch': 0.91} + 91%|█████████▏| 11130/12188 [2:21:00<2:02:23, 6.94s/it] 91%|█████████▏| 11131/12188 [2:21:08<2:04:47, 7.08s/it] {'loss': 0.2826, 'grad_norm': 0.8927498135643415, 'learning_rate': 1.9632086609379041e-07, 'epoch': 0.91} + 91%|█████████▏| 11131/12188 [2:21:08<2:04:47, 7.08s/it] 91%|█████████▏| 11132/12188 [2:21:15<2:04:32, 7.08s/it] {'loss': 0.3363, 'grad_norm': 0.7527046776329254, 'learning_rate': 1.9595236665125694e-07, 'epoch': 0.91} + 91%|█████████▏| 11132/12188 [2:21:15<2:04:32, 7.08s/it] 91%|█████████▏| 11133/12188 [2:21:23<2:10:41, 7.43s/it] {'loss': 0.2847, 'grad_norm': 0.7376303772514891, 'learning_rate': 1.9558420646284937e-07, 'epoch': 0.91} + 91%|█████████▏| 11133/12188 [2:21:23<2:10:41, 7.43s/it] 91%|█████████▏| 11134/12188 [2:21:31<2:13:22, 7.59s/it] {'loss': 0.3149, 'grad_norm': 0.676718759763863, 'learning_rate': 1.952163855545658e-07, 'epoch': 0.91} + 91%|█████████▏| 11134/12188 [2:21:31<2:13:22, 7.59s/it] 91%|█████████▏| 11135/12188 [2:21:38<2:08:25, 7.32s/it] {'loss': 0.3077, 'grad_norm': 0.7119208273686152, 'learning_rate': 1.9484890395238155e-07, 'epoch': 0.91} + 91%|█████████▏| 11135/12188 [2:21:38<2:08:25, 7.32s/it] 91%|█████████▏| 11136/12188 [2:21:44<2:06:09, 7.20s/it] {'loss': 0.2926, 'grad_norm': 0.6774810680756618, 'learning_rate': 1.9448176168224863e-07, 'epoch': 0.91} + 91%|█████████▏| 11136/12188 [2:21:44<2:06:09, 7.20s/it] 91%|█████████▏| 11137/12188 [2:21:51<2:03:45, 7.07s/it] {'loss': 0.3273, 'grad_norm': 0.7447500409600003, 'learning_rate': 1.941149587700919e-07, 'epoch': 0.91} + 91%|█████████▏| 11137/12188 [2:21:51<2:03:45, 7.07s/it] 91%|█████████▏| 11138/12188 [2:21:58<2:01:37, 6.95s/it] {'loss': 0.3061, 'grad_norm': 0.7167195288234308, 'learning_rate': 1.9374849524181617e-07, 'epoch': 0.91} + 91%|█████████▏| 11138/12188 [2:21:58<2:01:37, 6.95s/it] 91%|█████████▏| 11139/12188 [2:22:05<2:00:58, 6.92s/it] {'loss': 0.2878, 'grad_norm': 0.7092772957830009, 'learning_rate': 1.933823711232996e-07, 'epoch': 0.91} + 91%|█████████▏| 11139/12188 [2:22:05<2:00:58, 6.92s/it] 91%|█████████▏| 11140/12188 [2:22:11<1:59:50, 6.86s/it] {'loss': 0.2736, 'grad_norm': 0.7194896830999246, 'learning_rate': 1.9301658644039712e-07, 'epoch': 0.91} + 91%|█████████▏| 11140/12188 [2:22:11<1:59:50, 6.86s/it] 91%|█████████▏| 11141/12188 [2:22:18<1:59:17, 6.84s/it] {'loss': 0.2947, 'grad_norm': 0.703603051507818, 'learning_rate': 1.9265114121894135e-07, 'epoch': 0.91} + 91%|█████████▏| 11141/12188 [2:22:18<1:59:17, 6.84s/it] 91%|█████████▏| 11142/12188 [2:22:25<1:59:51, 6.88s/it] {'loss': 0.3119, 'grad_norm': 1.4468639400429186, 'learning_rate': 1.922860354847378e-07, 'epoch': 0.91} + 91%|█████████▏| 11142/12188 [2:22:25<1:59:51, 6.88s/it] 91%|█████████▏| 11143/12188 [2:22:33<2:03:38, 7.10s/it] {'loss': 0.3029, 'grad_norm': 0.7134245069913129, 'learning_rate': 1.919212692635708e-07, 'epoch': 0.91} + 91%|█████████▏| 11143/12188 [2:22:33<2:03:38, 7.10s/it] 91%|█████████▏| 11144/12188 [2:22:40<2:05:06, 7.19s/it] {'loss': 0.2869, 'grad_norm': 0.7349955357199525, 'learning_rate': 1.915568425811981e-07, 'epoch': 0.91} + 91%|█████████▏| 11144/12188 [2:22:40<2:05:06, 7.19s/it] 91%|█████████▏| 11145/12188 [2:22:47<2:03:56, 7.13s/it] {'loss': 0.2787, 'grad_norm': 0.7231556579470485, 'learning_rate': 1.9119275546335637e-07, 'epoch': 0.91} + 91%|█████████▏| 11145/12188 [2:22:47<2:03:56, 7.13s/it] 91%|█████████▏| 11146/12188 [2:22:54<2:01:20, 6.99s/it] {'loss': 0.2697, 'grad_norm': 0.7576014310371597, 'learning_rate': 1.9082900793575665e-07, 'epoch': 0.91} + 91%|█████████▏| 11146/12188 [2:22:54<2:01:20, 6.99s/it] 91%|█████████▏| 11147/12188 [2:23:01<1:59:33, 6.89s/it] {'loss': 0.2781, 'grad_norm': 0.8473891219955648, 'learning_rate': 1.904656000240851e-07, 'epoch': 0.91} + 91%|█████████▏| 11147/12188 [2:23:01<1:59:33, 6.89s/it] 91%|█████████▏| 11148/12188 [2:23:07<1:58:44, 6.85s/it] {'loss': 0.3182, 'grad_norm': 0.7192205420627756, 'learning_rate': 1.901025317540056e-07, 'epoch': 0.91} + 91%|█████████▏| 11148/12188 [2:23:07<1:58:44, 6.85s/it] 91%|█████████▏| 11149/12188 [2:23:15<2:01:02, 6.99s/it] {'loss': 0.3329, 'grad_norm': 0.661067147450264, 'learning_rate': 1.897398031511588e-07, 'epoch': 0.91} + 91%|█████████▏| 11149/12188 [2:23:15<2:01:02, 6.99s/it] 91%|█████████▏| 11150/12188 [2:23:22<2:03:57, 7.16s/it] {'loss': 0.2967, 'grad_norm': 0.7853203536427892, 'learning_rate': 1.8937741424115751e-07, 'epoch': 0.91} + 91%|█████████▏| 11150/12188 [2:23:22<2:03:57, 7.16s/it] 91%|█████████▏| 11151/12188 [2:23:30<2:08:11, 7.42s/it] {'loss': 0.2932, 'grad_norm': 0.6513387243766028, 'learning_rate': 1.8901536504959516e-07, 'epoch': 0.91} + 91%|█████████▏| 11151/12188 [2:23:30<2:08:11, 7.42s/it] 91%|█████████▏| 11152/12188 [2:23:37<2:04:35, 7.22s/it] {'loss': 0.319, 'grad_norm': 0.6525128381817374, 'learning_rate': 1.8865365560203797e-07, 'epoch': 0.91} + 91%|█████████▏| 11152/12188 [2:23:37<2:04:35, 7.22s/it] 92%|█████████▏| 11153/12188 [2:23:45<2:06:52, 7.35s/it] {'loss': 0.2905, 'grad_norm': 0.695553320883936, 'learning_rate': 1.8829228592402938e-07, 'epoch': 0.92} + 92%|█████████▏| 11153/12188 [2:23:45<2:06:52, 7.35s/it] 92%|█████████▏| 11154/12188 [2:23:52<2:05:29, 7.28s/it] {'loss': 0.2911, 'grad_norm': 0.7027485287830284, 'learning_rate': 1.879312560410901e-07, 'epoch': 0.92} + 92%|█████████▏| 11154/12188 [2:23:52<2:05:29, 7.28s/it] 92%|█████████▏| 11155/12188 [2:23:58<2:02:20, 7.11s/it] {'loss': 0.2976, 'grad_norm': 0.7567046734487369, 'learning_rate': 1.8757056597871305e-07, 'epoch': 0.92} + 92%|█████████▏| 11155/12188 [2:23:58<2:02:20, 7.11s/it] 92%|█████████▏| 11156/12188 [2:24:05<2:00:54, 7.03s/it] {'loss': 0.3646, 'grad_norm': 0.7587485587761763, 'learning_rate': 1.8721021576237175e-07, 'epoch': 0.92} + 92%|█████████▏| 11156/12188 [2:24:05<2:00:54, 7.03s/it] 92%|█████████▏| 11157/12188 [2:24:13<2:02:18, 7.12s/it] {'loss': 0.2707, 'grad_norm': 0.7350556545531196, 'learning_rate': 1.8685020541751197e-07, 'epoch': 0.92} + 92%|█████████▏| 11157/12188 [2:24:13<2:02:18, 7.12s/it] 92%|█████████▏| 11158/12188 [2:24:19<2:00:15, 7.01s/it] {'loss': 0.2765, 'grad_norm': 0.7241041951530534, 'learning_rate': 1.8649053496955838e-07, 'epoch': 0.92} + 92%|█████████▏| 11158/12188 [2:24:19<2:00:15, 7.01s/it] 92%|█████████▏| 11159/12188 [2:24:28<2:10:14, 7.59s/it] {'loss': 0.3138, 'grad_norm': 0.6937274255549696, 'learning_rate': 1.8613120444390952e-07, 'epoch': 0.92} + 92%|█████████▏| 11159/12188 [2:24:28<2:10:14, 7.59s/it] 92%|█████████▏| 11160/12188 [2:24:37<2:18:13, 8.07s/it] {'loss': 0.3671, 'grad_norm': 0.7081392242013417, 'learning_rate': 1.857722138659407e-07, 'epoch': 0.92} + 92%|█████████▏| 11160/12188 [2:24:37<2:18:13, 8.07s/it] 92%|█████████▏| 11161/12188 [2:24:44<2:12:08, 7.72s/it] {'loss': 0.2864, 'grad_norm': 0.7402130029167545, 'learning_rate': 1.8541356326100436e-07, 'epoch': 0.92} + 92%|█████████▏| 11161/12188 [2:24:44<2:12:08, 7.72s/it] 92%|█████████▏| 11162/12188 [2:24:51<2:07:58, 7.48s/it] {'loss': 0.3088, 'grad_norm': 0.6677508670945193, 'learning_rate': 1.850552526544258e-07, 'epoch': 0.92} + 92%|█████████▏| 11162/12188 [2:24:51<2:07:58, 7.48s/it] 92%|█████████▏| 11163/12188 [2:24:59<2:08:08, 7.50s/it] {'loss': 0.279, 'grad_norm': 0.7145950461878426, 'learning_rate': 1.8469728207150982e-07, 'epoch': 0.92} + 92%|█████████▏| 11163/12188 [2:24:59<2:08:08, 7.50s/it] 92%|█████████▏| 11164/12188 [2:25:06<2:03:50, 7.26s/it] {'loss': 0.2917, 'grad_norm': 0.7906255228422269, 'learning_rate': 1.8433965153753563e-07, 'epoch': 0.92} + 92%|█████████▏| 11164/12188 [2:25:06<2:03:50, 7.26s/it] 92%|█████████▏| 11165/12188 [2:25:12<2:00:59, 7.10s/it] {'loss': 0.3242, 'grad_norm': 0.7495380582489882, 'learning_rate': 1.8398236107775802e-07, 'epoch': 0.92} + 92%|█████████▏| 11165/12188 [2:25:12<2:00:59, 7.10s/it] 92%|█████████▏| 11166/12188 [2:25:19<1:58:24, 6.95s/it] {'loss': 0.3039, 'grad_norm': 0.6879911408149713, 'learning_rate': 1.8362541071740846e-07, 'epoch': 0.92} + 92%|█████████▏| 11166/12188 [2:25:19<1:58:24, 6.95s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 90, in pil_loader + return img.convert("RGB") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 993, in convert + self.load() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 319, in load + raise _get_oserror(err_code, encoder=False) +OSError: broken data stream when reading image file +[Try #0] Failed to fetch sample 6014818 in VC:s3://gui-agent/data_20250623/windows_augment/images. Exception: broken data stream when reading image file +Problematic sample: {'image': 'autocad/20250509_125727_1/images/before_screenshot_1_id_121_internvl_position_crop_1_grounding_instructions_point_o_paste.png', 'conversations': [{'from': 'human', 'value': "\nOnly respond with the coordinates: The 'CUSTOMIZE' text is located in the left side of the application window, within the Tool Palettes section. It's positioned at the top of what appears to be a customization panel, directly above the helper text that reads 'Press F1 for more help'."}, {'from': 'gpt', 'value': "The 'CUSTOMIZE' text is located in the left side of the application window, within the Tool Palettes section. It's positioned at the top of what appears to be a customization panel, directly above the helper text that reads 'Press F1 for more help'.[[495, 550, 515, 557]]"}], 'width': 3600, 'height': 2338} + 92%|█████████▏| 11167/12188 [2:25:28<2:07:24, 7.49s/it] {'loss': 0.3019, 'grad_norm': 0.7103654008774304, 'learning_rate': 1.832688004816946e-07, 'epoch': 0.92} + 92%|█████████▏| 11167/12188 [2:25:28<2:07:24, 7.49s/it] 92%|█████████▏| 11168/12188 [2:25:34<2:04:10, 7.30s/it] {'loss': 0.2645, 'grad_norm': 0.678277683561966, 'learning_rate': 1.8291253039579905e-07, 'epoch': 0.92} + 92%|█████████▏| 11168/12188 [2:25:34<2:04:10, 7.30s/it] 92%|█████████▏| 11169/12188 [2:25:41<2:00:48, 7.11s/it] {'loss': 0.2977, 'grad_norm': 0.8063713636776473, 'learning_rate': 1.8255660048488223e-07, 'epoch': 0.92} + 92%|█████████▏| 11169/12188 [2:25:41<2:00:48, 7.11s/it] 92%|█████████▏| 11170/12188 [2:25:48<1:59:30, 7.04s/it] {'loss': 0.335, 'grad_norm': 0.7037583876766896, 'learning_rate': 1.8220101077407738e-07, 'epoch': 0.92} + 92%|█████████▏| 11170/12188 [2:25:48<1:59:30, 7.04s/it] 92%|█████████▏| 11171/12188 [2:25:56<2:06:33, 7.47s/it] {'loss': 0.2865, 'grad_norm': 0.6839094025221226, 'learning_rate': 1.8184576128849773e-07, 'epoch': 0.92} + 92%|█████████▏| 11171/12188 [2:25:56<2:06:33, 7.47s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1348, in _get_item + raise ValueError( +ValueError: Number of image tokens ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'] does not match number of images None +[Try #0] Failed to fetch sample 1057917 in VC:s3://gui/aguvis/aguvis-stage2/amex/images. Exception: Number of image tokens ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'] does not match number of images None +Problematic sample: {'image': ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'], 'conversations': [{'from': 'human', 'value': "\nPlease generate the next move according to the UI screenshot, the task and previous operations.\n\nTask:\nI want to book a hotel in london, prize should be less than $600, guest rating is 8+, 4 star rating, breakfast included\n\nPrevious operations:\nStep 1: Tap on the Chrome app to start searching for hotels in London.\nStep 2: Type 'Chrome' in the search bar to open the Chrome browser.\nStep 3: Tap on the Hotels.com app icon to begin searching for a hotel.\nStep 4: Wait for the Hotels.com app to finish loading to proceed with the hotel search.\nStep 5: Tap on the 'Going to' field to enter London as the destination.\nStep 6: Tap on 'London' from Recent Searches to select it as the destination for the hotel search.\nStep 7: Tap on 'London, England, United Kingdom' to select it as the destination for the hotel search.\nStep 8: Tap on the 'Search' button to view available hotels in London.\nStep 9: Tap on the filter icon or option to apply filters to the hotel search results.\nStep 10: Swipe up on the screen to reveal more filter options, such as guest rating and hotel stars.\nStep 11: Swipe up to reveal more filter options, specifically guest ratings and hotel stars.\nStep 12: Tap on the 4-star rating box to select it.\nStep 13: Tap 'Done' to confirm the current filter selections and proceed.\nStep 14: Swipe up on the screen to locate and select the 'breakfast included' filter option.\nStep 15: Tap on 'Breakfast included' to select it.\nStep 16: Tap on the 'Done' button to apply the filters and view available hotel options.\nStep 17: Tap on the topmost hotel card to view its details.\nStep 18: Tap on the 'Select a room' button to check available room options and their prices.\nStep 19: Tap on 'Select and customize' to proceed with the room selection and customization options.\nStep 20: Tap on 'Breakfast buffet' to select it, then tap on 'Reserve' to book the room."}, {'from': 'gpt', 'value': "\nWait for the loading to complete to confirm the booking status.\n\n\nterminate(status='success')\n"}]} + 92%|█████████▏| 11172/12188 [2:26:03<2:02:11, 7.22s/it] {'loss': 0.2674, 'grad_norm': 0.6853664088856297, 'learning_rate': 1.814908520532288e-07, 'epoch': 0.92} + 92%|█████████▏| 11172/12188 [2:26:03<2:02:11, 7.22s/it] 92%|█████████▏| 11173/12188 [2:26:11<2:05:33, 7.42s/it] {'loss': 0.2724, 'grad_norm': 0.919663930371456, 'learning_rate': 1.8113628309333497e-07, 'epoch': 0.92} + 92%|█████████▏| 11173/12188 [2:26:11<2:05:33, 7.42s/it] 92%|█████████▏| 11174/12188 [2:26:19<2:06:51, 7.51s/it] {'loss': 0.292, 'grad_norm': 0.6973767004791466, 'learning_rate': 1.8078205443385456e-07, 'epoch': 0.92} + 92%|█████████▏| 11174/12188 [2:26:19<2:06:51, 7.51s/it] 92%|█████████▏| 11175/12188 [2:26:25<2:02:00, 7.23s/it] {'loss': 0.2724, 'grad_norm': 0.6902443277844865, 'learning_rate': 1.804281660998025e-07, 'epoch': 0.92} + 92%|█████████▏| 11175/12188 [2:26:25<2:02:00, 7.23s/it] 92%|█████████▏| 11176/12188 [2:26:33<2:02:19, 7.25s/it] {'loss': 0.3598, 'grad_norm': 0.7743158890876478, 'learning_rate': 1.8007461811617e-07, 'epoch': 0.92} + 92%|█████████▏| 11176/12188 [2:26:33<2:02:19, 7.25s/it] 92%|█████████▏| 11177/12188 [2:26:40<2:01:19, 7.20s/it] {'loss': 0.2979, 'grad_norm': 0.7823753759782084, 'learning_rate': 1.797214105079248e-07, 'epoch': 0.92} + 92%|█████████▏| 11177/12188 [2:26:40<2:01:19, 7.20s/it] 92%|█████████▏| 11178/12188 [2:26:48<2:07:47, 7.59s/it] {'loss': 0.3303, 'grad_norm': 0.6910155049462557, 'learning_rate': 1.793685433000081e-07, 'epoch': 0.92} + 92%|█████████▏| 11178/12188 [2:26:48<2:07:47, 7.59s/it] 92%|█████████▏| 11179/12188 [2:26:56<2:07:14, 7.57s/it] {'loss': 0.3329, 'grad_norm': 0.7490677081842697, 'learning_rate': 1.7901601651734101e-07, 'epoch': 0.92} + 92%|█████████▏| 11179/12188 [2:26:56<2:07:14, 7.57s/it] 92%|█████████▏| 11180/12188 [2:27:03<2:03:42, 7.36s/it] {'loss': 0.2887, 'grad_norm': 0.6911657575381769, 'learning_rate': 1.7866383018481704e-07, 'epoch': 0.92} + 92%|█████████▏| 11180/12188 [2:27:03<2:03:42, 7.36s/it] 92%|█████████▏| 11181/12188 [2:27:13<2:17:31, 8.19s/it] {'loss': 0.2872, 'grad_norm': 0.7180753478793787, 'learning_rate': 1.7831198432730678e-07, 'epoch': 0.92} + 92%|█████████▏| 11181/12188 [2:27:13<2:17:31, 8.19s/it] 92%|█████████▏| 11182/12188 [2:27:20<2:13:37, 7.97s/it] {'loss': 0.2747, 'grad_norm': 0.6991361064207425, 'learning_rate': 1.7796047896965817e-07, 'epoch': 0.92} + 92%|█████████▏| 11182/12188 [2:27:20<2:13:37, 7.97s/it] 92%|█████████▏| 11183/12188 [2:27:27<2:08:31, 7.67s/it] {'loss': 0.2972, 'grad_norm': 0.7282019283774276, 'learning_rate': 1.7760931413669303e-07, 'epoch': 0.92} + 92%|█████████▏| 11183/12188 [2:27:27<2:08:31, 7.67s/it] 92%|█████████▏| 11184/12188 [2:27:37<2:17:59, 8.25s/it] {'loss': 0.3128, 'grad_norm': 0.6880995927073441, 'learning_rate': 1.7725848985321037e-07, 'epoch': 0.92} + 92%|█████████▏| 11184/12188 [2:27:37<2:17:59, 8.25s/it] 92%|█████████▏| 11185/12188 [2:27:44<2:11:52, 7.89s/it] {'loss': 0.2882, 'grad_norm': 0.6938822884177609, 'learning_rate': 1.7690800614398483e-07, 'epoch': 0.92} + 92%|█████████▏| 11185/12188 [2:27:44<2:11:52, 7.89s/it] 92%|█████████▏| 11186/12188 [2:27:50<2:05:12, 7.50s/it] {'loss': 0.3187, 'grad_norm': 0.699894706379759, 'learning_rate': 1.7655786303376775e-07, 'epoch': 0.92} + 92%|█████████▏| 11186/12188 [2:27:50<2:05:12, 7.50s/it] 92%|█████████▏| 11187/12188 [2:27:57<2:01:33, 7.29s/it] {'loss': 0.2764, 'grad_norm': 0.6584841868936128, 'learning_rate': 1.7620806054728434e-07, 'epoch': 0.92} + 92%|█████████▏| 11187/12188 [2:27:57<2:01:33, 7.29s/it] 92%|█████████▏| 11188/12188 [2:28:04<1:57:53, 7.07s/it] {'loss': 0.2946, 'grad_norm': 0.8457625232826453, 'learning_rate': 1.758585987092376e-07, 'epoch': 0.92} + 92%|█████████▏| 11188/12188 [2:28:04<1:57:53, 7.07s/it] 92%|█████████▏| 11189/12188 [2:28:11<2:00:13, 7.22s/it] {'loss': 0.288, 'grad_norm': 0.7099583373947796, 'learning_rate': 1.7550947754430615e-07, 'epoch': 0.92} + 92%|█████████▏| 11189/12188 [2:28:11<2:00:13, 7.22s/it] 92%|█████████▏| 11190/12188 [2:28:18<1:56:43, 7.02s/it] {'loss': 0.316, 'grad_norm': 0.7245110432782931, 'learning_rate': 1.751606970771441e-07, 'epoch': 0.92} + 92%|█████████▏| 11190/12188 [2:28:18<1:56:43, 7.02s/it] 92%|█████████▏| 11191/12188 [2:28:25<1:56:22, 7.00s/it] {'loss': 0.272, 'grad_norm': 0.7162213542276701, 'learning_rate': 1.7481225733238184e-07, 'epoch': 0.92} + 92%|█████████▏| 11191/12188 [2:28:25<1:56:22, 7.00s/it] 92%|█████████▏| 11192/12188 [2:28:32<1:58:24, 7.13s/it] {'loss': 0.2622, 'grad_norm': 0.642065479275801, 'learning_rate': 1.7446415833462625e-07, 'epoch': 0.92} + 92%|█████████▏| 11192/12188 [2:28:32<1:58:24, 7.13s/it] 92%|█████████▏| 11193/12188 [2:28:39<1:56:23, 7.02s/it] {'loss': 0.3011, 'grad_norm': 0.7023512178609783, 'learning_rate': 1.7411640010845832e-07, 'epoch': 0.92} + 92%|█████████▏| 11193/12188 [2:28:39<1:56:23, 7.02s/it] 92%|█████████▏| 11194/12188 [2:28:46<1:55:51, 6.99s/it] {'loss': 0.2792, 'grad_norm': 0.6896769047969132, 'learning_rate': 1.7376898267843722e-07, 'epoch': 0.92} + 92%|█████████▏| 11194/12188 [2:28:46<1:55:51, 6.99s/it] 92%|█████████▏| 11195/12188 [2:28:53<1:55:01, 6.95s/it] {'loss': 0.2929, 'grad_norm': 0.7294860734149823, 'learning_rate': 1.734219060690967e-07, 'epoch': 0.92} + 92%|█████████▏| 11195/12188 [2:28:53<1:55:01, 6.95s/it] 92%|█████████▏| 11196/12188 [2:28:59<1:53:15, 6.85s/it] {'loss': 0.3224, 'grad_norm': 0.6772652787202853, 'learning_rate': 1.730751703049466e-07, 'epoch': 0.92} + 92%|█████████▏| 11196/12188 [2:28:59<1:53:15, 6.85s/it] 92%|█████████▏| 11197/12188 [2:29:06<1:53:12, 6.85s/it] {'loss': 0.3272, 'grad_norm': 0.7081183964910114, 'learning_rate': 1.7272877541047284e-07, 'epoch': 0.92} + 92%|█████████▏| 11197/12188 [2:29:06<1:53:12, 6.85s/it] 92%|█████████▏| 11198/12188 [2:29:14<1:55:21, 6.99s/it] {'loss': 0.2734, 'grad_norm': 0.9669476647030637, 'learning_rate': 1.7238272141013702e-07, 'epoch': 0.92} + 92%|█████████▏| 11198/12188 [2:29:14<1:55:21, 6.99s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 899, in process_image_unified + visual_processed = processor.preprocess(image, return_tensors="pt") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 432, in preprocess + patches, image_grid_thw = self._preprocess( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 252, in _preprocess + resized_height, resized_width = smart_resize( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 69, in smart_resize + raise ValueError(f"height:{height} and width:{width} must be larger than factor:{factor}") +ValueError: height:21 and width:135 must be larger than factor:28 +[Try #0] Failed to fetch sample 2206265 in VC:s3://gui-agent/jedi/images/figma400k/figma400k_extracted/. Exception: height:21 and width:135 must be larger than factor:28 +Problematic sample: {'image': 'a4b739659e7c325d58fab0a3e55e135c875dc9a485e2b2af942dbafdf4662908.png', 'conversations': [{'from': 'human', 'value': "\nThe Date picker trigger button icon's intended function:\nActs as a date picker trigger that opens a calendar dropdown when clicked, allowing users to select dates visually rather than typing them manually. It's part of the browser's default date input functionality."}, {'from': 'gpt', 'value': '[[674, 238, 756, 762]]'}, {'from': 'human', 'value': "The spatial layout of this Date picker trigger button icon:\nLocated at the absolute right edge of a date input field, aligned vertically in the center. The triangle appears immediately adjacent to another UI control in the same date field.\n\nRegarding the Date picker trigger button icon's functionality:\nActs as a date picker trigger that opens a calendar dropdown when clicked, allowing users to select dates visually rather than typing them manually. It's part of the browser's default date input functionality."}, {'from': 'gpt', 'value': '[[674, 238, 756, 762]]'}, {'from': 'human', 'value': 'Location and arrangement of this Date picker trigger button icon:\nLocated at the absolute right edge of a date input field, aligned vertically in the center. The triangle appears immediately adjacent to another UI control in the same date field.'}, {'from': 'gpt', 'value': '[[674, 238, 756, 762]]'}, {'from': 'human', 'value': "The Button (Spinner Button)'s visual characteristics are as follows:\nA small downward-pointing arrow button that's part of a number input control. The arrow has a dark color and appears in a square container with border styling.\n\nPositional details of the Button (Spinner Button):\nThis button is positioned on the right side of a date input field, located in a row with two identical date input controls. Each date field has a dd/mm/yyyy format placeholder text."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[785, 190, 852, 857]]'}, {'from': 'human', 'value': "The visual appearance of this Button (Spinner Button) is as follows:\n\nVisual Description: A small downward-pointing arrow button that's part of a number input control. The arrow has a dark color and appears in a square container with border styling."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[785, 190, 852, 857]]'}, {'from': 'human', 'value': "This Button (Spinner Button) serves the following purpose:\nWhen clicked, this button decreases the value in the associated date input field. It's part of the browser's default date picker UI controls that allow users to adjust date values without typing."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[785, 190, 852, 857]]'}, {'from': 'human', 'value': "The User Profile Component's appearance can be described as:\nA compact user profile display showing the username 'NotTerry' accompanied by a small circular avatar with a cyan/blue profile picture\n\nThe User Profile Component's placement in the interface:\nLocated in the live chat section on the right side of the interface, integrated into the message stream, appearing as a message sender's identification"}, {'from': 'gpt', 'value': '[[785, 190, 852, 857]]'}, {'from': 'human', 'value': "The User Profile Component's appearance can be described as:\nA compact user profile display showing the username 'NotTerry' accompanied by a small circular avatar with a cyan/blue profile picture\n\nFunctional capabilities of the User Profile Component:\nRepresents a user's identity and serves as a header for their chat messages in the live chat interface"}, {'from': 'gpt', 'value': '[[785, 190, 852, 857]]'}, {'from': 'human', 'value': "The visual attributes of this User Profile Component are:\nA compact user profile display showing the username 'NotTerry' accompanied by a small circular avatar with a cyan/blue profile picture\n\nThis User Profile Component is positioned as follows:\nLocated in the live chat section on the right side of the interface, integrated into the message stream, appearing as a message sender's identification\n\nUsage and purpose of this User Profile Component:\nRepresents a user's identity and serves as a header for their chat messages in the live chat interface"}, {'from': 'gpt', 'value': '[[785, 190, 852, 857]]'}, {'from': 'human', 'value': "Here's a detailed visual description of the Navigation Button:\nA navigation icon featuring three small squares arranged in a grid pattern, located within a dark navy interface. The icon appears to use a faintly glowing blue color against the dark background.\n\nThis Navigation Button allows users to:\nA navigation button that likely serves as a 'MAIN' menu control, allowing users to access primary navigation options or return to the main view of the application."}, {'from': 'gpt', 'value': '[[0, 0, 1000, 1000]]'}, {'from': 'human', 'value': "The position of this Navigation Button can be described as:\nThe icon is positioned in the upper-left side of the interface, just below the RustyLoot logo. It's the first item in what appears to be a vertical navigation menu with other gaming-related options.\n\nThis Navigation Button serves the following purpose:\nA navigation button that likely serves as a 'MAIN' menu control, allowing users to access primary navigation options or return to the main view of the application."}, {'from': 'gpt', 'value': '[[0, 0, 1000, 1000]]'}, {'from': 'human', 'value': "The Navigation Button's appearance can be described as:\nA navigation icon featuring three small squares arranged in a grid pattern, located within a dark navy interface. The icon appears to use a faintly glowing blue color against the dark background.\n\nPositional details of the Navigation Button:\nThe icon is positioned in the upper-left side of the interface, just below the RustyLoot logo. It's the first item in what appears to be a vertical navigation menu with other gaming-related options."}, {'from': 'gpt', 'value': '[[0, 0, 1000, 1000]]'}]} + 92%|█████████▏| 11199/12188 [2:29:20<1:54:21, 6.94s/it] {'loss': 0.3238, 'grad_norm': 0.6827886991068891, 'learning_rate': 1.7203700832837844e-07, 'epoch': 0.92} + 92%|█████████▏| 11199/12188 [2:29:20<1:54:21, 6.94s/it] 92%|█████████▏| 11200/12188 [2:29:28<1:55:15, 7.00s/it] {'loss': 0.2907, 'grad_norm': 0.6634725489969111, 'learning_rate': 1.7169163618960817e-07, 'epoch': 0.92} + 92%|█████████▏| 11200/12188 [2:29:28<1:55:15, 7.00s/it] 92%|█████████▏| 11201/12188 [2:29:34<1:53:55, 6.93s/it] {'loss': 0.3019, 'grad_norm': 0.6939149727767926, 'learning_rate': 1.713466050182183e-07, 'epoch': 0.92} + 92%|█████████▏| 11201/12188 [2:29:34<1:53:55, 6.93s/it] 92%|█████████▏| 11202/12188 [2:29:41<1:55:04, 7.00s/it] {'loss': 0.2993, 'grad_norm': 0.733538278874451, 'learning_rate': 1.7100191483857275e-07, 'epoch': 0.92} + 92%|█████████▏| 11202/12188 [2:29:41<1:55:04, 7.00s/it] 92%|█████████▏| 11203/12188 [2:29:49<1:55:18, 7.02s/it] {'loss': 0.3121, 'grad_norm': 0.7422676914599128, 'learning_rate': 1.7065756567501424e-07, 'epoch': 0.92} + 92%|█████████▏| 11203/12188 [2:29:49<1:55:18, 7.02s/it] 92%|█████████▏| 11204/12188 [2:29:56<1:57:18, 7.15s/it] {'loss': 0.2916, 'grad_norm': 0.6806040577027234, 'learning_rate': 1.7031355755185886e-07, 'epoch': 0.92} + 92%|█████████▏| 11204/12188 [2:29:56<1:57:18, 7.15s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 92%|█████████▏| 11205/12188 [2:30:02<1:52:11, 6.85s/it] {'loss': 0.7059, 'grad_norm': 0.5780712019719737, 'learning_rate': 1.6996989049340106e-07, 'epoch': 0.92} + 92%|█████████▏| 11205/12188 [2:30:02<1:52:11, 6.85s/it] 92%|█████████▏| 11206/12188 [2:30:13<2:09:58, 7.94s/it] {'loss': 0.3045, 'grad_norm': 0.7336586090005255, 'learning_rate': 1.6962656452390925e-07, 'epoch': 0.92} + 92%|█████████▏| 11206/12188 [2:30:13<2:09:58, 7.94s/it] 92%|█████████▏| 11207/12188 [2:30:19<2:04:07, 7.59s/it] {'loss': 0.3171, 'grad_norm': 0.7828880966564624, 'learning_rate': 1.6928357966762843e-07, 'epoch': 0.92} + 92%|█████████▏| 11207/12188 [2:30:19<2:04:07, 7.59s/it] 92%|█████████▏| 11208/12188 [2:30:26<2:00:35, 7.38s/it] {'loss': 0.2875, 'grad_norm': 0.7138179703539664, 'learning_rate': 1.689409359487809e-07, 'epoch': 0.92} + 92%|█████████▏| 11208/12188 [2:30:26<2:00:35, 7.38s/it] 92%|█████████▏| 11209/12188 [2:30:34<1:59:37, 7.33s/it] {'loss': 0.3046, 'grad_norm': 0.7141288173484236, 'learning_rate': 1.6859863339156235e-07, 'epoch': 0.92} + 92%|█████████▏| 11209/12188 [2:30:34<1:59:37, 7.33s/it] 92%|█████████▏| 11210/12188 [2:30:40<1:57:13, 7.19s/it] {'loss': 0.2719, 'grad_norm': 0.7476760644741419, 'learning_rate': 1.6825667202014617e-07, 'epoch': 0.92} + 92%|█████████▏| 11210/12188 [2:30:40<1:57:13, 7.19s/it] 92%|█████████▏| 11211/12188 [2:30:47<1:55:34, 7.10s/it] {'loss': 0.2827, 'grad_norm': 0.7103407860902603, 'learning_rate': 1.6791505185868085e-07, 'epoch': 0.92} + 92%|█████████▏| 11211/12188 [2:30:47<1:55:34, 7.10s/it] 92%|█████████▏| 11212/12188 [2:30:54<1:55:39, 7.11s/it] {'loss': 0.287, 'grad_norm': 0.6713732085251517, 'learning_rate': 1.675737729312915e-07, 'epoch': 0.92} + 92%|█████████▏| 11212/12188 [2:30:54<1:55:39, 7.11s/it] 92%|█████████▏| 11213/12188 [2:31:01<1:53:50, 7.01s/it] {'loss': 0.305, 'grad_norm': 0.8735051713076202, 'learning_rate': 1.6723283526207833e-07, 'epoch': 0.92} + 92%|█████████▏| 11213/12188 [2:31:01<1:53:50, 7.01s/it] 92%|█████████▏| 11214/12188 [2:31:08<1:52:50, 6.95s/it] {'loss': 0.2898, 'grad_norm': 0.690459381900014, 'learning_rate': 1.6689223887511818e-07, 'epoch': 0.92} + 92%|█████████▏| 11214/12188 [2:31:08<1:52:50, 6.95s/it] 92%|█████████▏| 11215/12188 [2:31:15<1:52:24, 6.93s/it] {'loss': 0.314, 'grad_norm': 0.6929339987239735, 'learning_rate': 1.6655198379446347e-07, 'epoch': 0.92} + 92%|█████████▏| 11215/12188 [2:31:15<1:52:24, 6.93s/it] 92%|█████████▏| 11216/12188 [2:31:22<1:52:54, 6.97s/it] {'loss': 0.3033, 'grad_norm': 0.7182650896337903, 'learning_rate': 1.6621207004414108e-07, 'epoch': 0.92} + 92%|█████████▏| 11216/12188 [2:31:22<1:52:54, 6.97s/it] 92%|█████████▏| 11217/12188 [2:31:28<1:50:41, 6.84s/it] {'loss': 0.2824, 'grad_norm': 0.7094500458623321, 'learning_rate': 1.6587249764815628e-07, 'epoch': 0.92} + 92%|█████████▏| 11217/12188 [2:31:28<1:50:41, 6.84s/it] 92%|█████████▏| 11218/12188 [2:31:37<1:56:29, 7.21s/it] {'loss': 0.3021, 'grad_norm': 0.8217182234567441, 'learning_rate': 1.6553326663048986e-07, 'epoch': 0.92} + 92%|█████████▏| 11218/12188 [2:31:37<1:56:29, 7.21s/it] 92%|█████████▏| 11219/12188 [2:31:45<2:00:46, 7.48s/it] {'loss': 0.3131, 'grad_norm': 0.7601518052569586, 'learning_rate': 1.6519437701509654e-07, 'epoch': 0.92} + 92%|█████████▏| 11219/12188 [2:31:45<2:00:46, 7.48s/it] 92%|█████████▏| 11220/12188 [2:31:53<2:06:05, 7.82s/it] {'loss': 0.2751, 'grad_norm': 0.6964379314814756, 'learning_rate': 1.6485582882590833e-07, 'epoch': 0.92} + 92%|█████████▏| 11220/12188 [2:31:53<2:06:05, 7.82s/it] 92%|█████████▏| 11221/12188 [2:32:00<2:02:26, 7.60s/it] {'loss': 0.3184, 'grad_norm': 0.7040331854864861, 'learning_rate': 1.6451762208683387e-07, 'epoch': 0.92} + 92%|█████████▏| 11221/12188 [2:32:00<2:02:26, 7.60s/it] 92%|█████████▏| 11222/12188 [2:32:08<2:02:46, 7.63s/it] {'loss': 0.2785, 'grad_norm': 0.7075172966903762, 'learning_rate': 1.6417975682175568e-07, 'epoch': 0.92} + 92%|█████████▏| 11222/12188 [2:32:08<2:02:46, 7.63s/it] 92%|█████████▏| 11223/12188 [2:32:17<2:07:38, 7.94s/it] {'loss': 0.3002, 'grad_norm': 0.6516023402574832, 'learning_rate': 1.6384223305453417e-07, 'epoch': 0.92} + 92%|█████████▏| 11223/12188 [2:32:17<2:07:38, 7.94s/it] 92%|█████████▏| 11224/12188 [2:32:24<2:04:39, 7.76s/it] {'loss': 0.2886, 'grad_norm': 0.6986550134440181, 'learning_rate': 1.6350505080900358e-07, 'epoch': 0.92} + 92%|█████████▏| 11224/12188 [2:32:24<2:04:39, 7.76s/it] 92%|█████████▏| 11225/12188 [2:32:31<2:00:21, 7.50s/it] {'loss': 0.3369, 'grad_norm': 0.846348481466271, 'learning_rate': 1.63168210108976e-07, 'epoch': 0.92} + 92%|█████████▏| 11225/12188 [2:32:31<2:00:21, 7.50s/it] 92%|█████████▏| 11226/12188 [2:32:38<1:57:49, 7.35s/it] {'loss': 0.3035, 'grad_norm': 0.8204047457825776, 'learning_rate': 1.6283171097823903e-07, 'epoch': 0.92} + 92%|█████████▏| 11226/12188 [2:32:38<1:57:49, 7.35s/it][2025-08-18 13:30:52,183] [WARNING] [stage3.py:2118:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time + 92%|█████████▏| 11227/12188 [2:32:51<2:23:31, 8.96s/it] {'loss': 0.3065, 'grad_norm': 0.7175185040630233, 'learning_rate': 1.6249555344055533e-07, 'epoch': 0.92} + 92%|█████████▏| 11227/12188 [2:32:51<2:23:31, 8.96s/it] 92%|█████████▏| 11228/12188 [2:32:58<2:17:41, 8.61s/it] {'loss': 0.3196, 'grad_norm': 0.6873672435857192, 'learning_rate': 1.6215973751966262e-07, 'epoch': 0.92} + 92%|█████████▏| 11228/12188 [2:32:58<2:17:41, 8.61s/it] 92%|█████████▏| 11229/12188 [2:33:05<2:09:03, 8.07s/it] {'loss': 0.2789, 'grad_norm': 0.6951233149666178, 'learning_rate': 1.6182426323927747e-07, 'epoch': 0.92} + 92%|█████████▏| 11229/12188 [2:33:05<2:09:03, 8.07s/it] 92%|█████████▏| 11230/12188 [2:33:13<2:06:53, 7.95s/it] {'loss': 0.2934, 'grad_norm': 0.6751643978880865, 'learning_rate': 1.6148913062308923e-07, 'epoch': 0.92} + 92%|█████████▏| 11230/12188 [2:33:13<2:06:53, 7.95s/it] 92%|█████████▏| 11231/12188 [2:33:23<2:16:52, 8.58s/it] {'loss': 0.2521, 'grad_norm': 0.6524874753389012, 'learning_rate': 1.6115433969476567e-07, 'epoch': 0.92} + 92%|█████████▏| 11231/12188 [2:33:23<2:16:52, 8.58s/it] 92%|█████████▏| 11232/12188 [2:33:30<2:08:34, 8.07s/it] {'loss': 0.2431, 'grad_norm': 0.659918330448364, 'learning_rate': 1.6081989047794732e-07, 'epoch': 0.92} + 92%|█████████▏| 11232/12188 [2:33:30<2:08:34, 8.07s/it] 92%|█████████▏| 11233/12188 [2:33:37<2:05:53, 7.91s/it] {'loss': 0.2976, 'grad_norm': 0.749968799927241, 'learning_rate': 1.6048578299625472e-07, 'epoch': 0.92} + 92%|█████████▏| 11233/12188 [2:33:37<2:05:53, 7.91s/it] 92%|█████████▏| 11234/12188 [2:33:46<2:10:50, 8.23s/it] {'loss': 0.2774, 'grad_norm': 0.6312709816459744, 'learning_rate': 1.6015201727328012e-07, 'epoch': 0.92} + 92%|█████████▏| 11234/12188 [2:33:46<2:10:50, 8.23s/it] 92%|█████████▏| 11235/12188 [2:33:53<2:02:55, 7.74s/it] {'loss': 0.2825, 'grad_norm': 0.7178652243148463, 'learning_rate': 1.598185933325941e-07, 'epoch': 0.92} + 92%|█████████▏| 11235/12188 [2:33:53<2:02:55, 7.74s/it] 92%|█████████▏| 11236/12188 [2:34:00<2:00:33, 7.60s/it] {'loss': 0.3564, 'grad_norm': 0.7730057535915842, 'learning_rate': 1.5948551119774336e-07, 'epoch': 0.92} + 92%|█████████▏| 11236/12188 [2:34:00<2:00:33, 7.60s/it] 92%|█████████▏| 11237/12188 [2:34:07<1:57:04, 7.39s/it] {'loss': 0.2788, 'grad_norm': 0.747992303136655, 'learning_rate': 1.59152770892248e-07, 'epoch': 0.92} + 92%|█████████▏| 11237/12188 [2:34:07<1:57:04, 7.39s/it] 92%|█████████▏| 11238/12188 [2:34:14<1:56:23, 7.35s/it] {'loss': 0.3258, 'grad_norm': 0.6720862277115186, 'learning_rate': 1.5882037243960702e-07, 'epoch': 0.92} + 92%|█████████▏| 11238/12188 [2:34:14<1:56:23, 7.35s/it] 92%|█████████▏| 11239/12188 [2:34:22<1:57:08, 7.41s/it] {'loss': 0.3093, 'grad_norm': 0.7307359498729276, 'learning_rate': 1.5848831586329382e-07, 'epoch': 0.92} + 92%|█████████▏| 11239/12188 [2:34:22<1:57:08, 7.41s/it] 92%|█████████▏| 11240/12188 [2:34:29<1:53:51, 7.21s/it] {'loss': 0.315, 'grad_norm': 0.7021502522932467, 'learning_rate': 1.581566011867569e-07, 'epoch': 0.92} + 92%|��████████▏| 11240/12188 [2:34:29<1:53:51, 7.21s/it] 92%|█████████▏| 11241/12188 [2:34:36<1:52:20, 7.12s/it] {'loss': 0.2816, 'grad_norm': 0.7291904990770542, 'learning_rate': 1.5782522843342197e-07, 'epoch': 0.92} + 92%|█████████▏| 11241/12188 [2:34:36<1:52:20, 7.12s/it] 92%|█████████▏| 11242/12188 [2:34:43<1:53:07, 7.17s/it] {'loss': 0.2799, 'grad_norm': 0.72056831800177, 'learning_rate': 1.574941976266897e-07, 'epoch': 0.92} + 92%|█████████▏| 11242/12188 [2:34:43<1:53:07, 7.17s/it] 92%|█████████▏| 11243/12188 [2:34:50<1:50:37, 7.02s/it] {'loss': 0.3105, 'grad_norm': 0.6981449962243652, 'learning_rate': 1.5716350878993703e-07, 'epoch': 0.92} + 92%|█████████▏| 11243/12188 [2:34:50<1:50:37, 7.02s/it] 92%|█████████▏| 11244/12188 [2:34:57<1:51:56, 7.11s/it] {'loss': 0.2716, 'grad_norm': 0.7050030845972024, 'learning_rate': 1.5683316194651743e-07, 'epoch': 0.92} + 92%|█████████▏| 11244/12188 [2:34:57<1:51:56, 7.11s/it] 92%|█████████▏| 11245/12188 [2:35:04<1:50:09, 7.01s/it] {'loss': 0.3315, 'grad_norm': 0.7380278111527551, 'learning_rate': 1.5650315711975783e-07, 'epoch': 0.92} + 92%|█████████▏| 11245/12188 [2:35:04<1:50:09, 7.01s/it] 92%|█████████▏| 11246/12188 [2:35:10<1:48:54, 6.94s/it] {'loss': 0.3598, 'grad_norm': 0.7094033582704029, 'learning_rate': 1.5617349433296402e-07, 'epoch': 0.92} + 92%|█████████▏| 11246/12188 [2:35:10<1:48:54, 6.94s/it] 92%|█████████▏| 11247/12188 [2:35:18<1:51:44, 7.13s/it] {'loss': 0.3518, 'grad_norm': 0.7179580684837842, 'learning_rate': 1.5584417360941572e-07, 'epoch': 0.92} + 92%|█████████▏| 11247/12188 [2:35:18<1:51:44, 7.13s/it] 92%|█████████▏| 11248/12188 [2:35:25<1:53:21, 7.24s/it] {'loss': 0.3027, 'grad_norm': 0.6986789283056178, 'learning_rate': 1.5551519497236877e-07, 'epoch': 0.92} + 92%|█████████▏| 11248/12188 [2:35:25<1:53:21, 7.24s/it] 92%|█████████▏| 11249/12188 [2:35:32<1:49:57, 7.03s/it] {'loss': 0.2898, 'grad_norm': 0.6838520553562739, 'learning_rate': 1.5518655844505627e-07, 'epoch': 0.92} + 92%|█████████▏| 11249/12188 [2:35:32<1:49:57, 7.03s/it] 92%|█████████▏| 11250/12188 [2:35:42<2:03:09, 7.88s/it] {'loss': 0.2957, 'grad_norm': 0.7121197389719113, 'learning_rate': 1.548582640506846e-07, 'epoch': 0.92} + 92%|█████████▏| 11250/12188 [2:35:42<2:03:09, 7.88s/it] 92%|█████████▏| 11251/12188 [2:35:49<2:01:29, 7.78s/it] {'loss': 0.3022, 'grad_norm': 0.628023405164576, 'learning_rate': 1.5453031181243804e-07, 'epoch': 0.92} + 92%|█████████▏| 11251/12188 [2:35:49<2:01:29, 7.78s/it] 92%|█████████▏| 11252/12188 [2:35:56<1:54:52, 7.36s/it] {'loss': 0.2766, 'grad_norm': 1.0832103841968717, 'learning_rate': 1.5420270175347696e-07, 'epoch': 0.92} + 92%|█████████▏| 11252/12188 [2:35:56<1:54:52, 7.36s/it] 92%|█████████▏| 11253/12188 [2:36:03<1:54:42, 7.36s/it] {'loss': 0.2785, 'grad_norm': 0.7590988546781902, 'learning_rate': 1.538754338969345e-07, 'epoch': 0.92} + 92%|█████████▏| 11253/12188 [2:36:03<1:54:42, 7.36s/it] 92%|█████████▏| 11254/12188 [2:36:10<1:52:43, 7.24s/it] {'loss': 0.2793, 'grad_norm': 0.7078263548532155, 'learning_rate': 1.5354850826592383e-07, 'epoch': 0.92} + 92%|█████████▏| 11254/12188 [2:36:10<1:52:43, 7.24s/it] 92%|█████████▏| 11255/12188 [2:36:17<1:49:05, 7.02s/it] {'loss': 0.2635, 'grad_norm': 0.7216400447731017, 'learning_rate': 1.532219248835315e-07, 'epoch': 0.92} + 92%|█████████▏| 11255/12188 [2:36:17<1:49:05, 7.02s/it] 92%|█████████▏| 11256/12188 [2:36:23<1:47:05, 6.89s/it] {'loss': 0.2924, 'grad_norm': 0.6946167317049176, 'learning_rate': 1.5289568377281905e-07, 'epoch': 0.92} + 92%|█████████▏| 11256/12188 [2:36:23<1:47:05, 6.89s/it] 92%|█████████▏| 11257/12188 [2:36:30<1:47:12, 6.91s/it] {'loss': 0.3224, 'grad_norm': 0.6779292187917355, 'learning_rate': 1.525697849568264e-07, 'epoch': 0.92} + 92%|█████████▏| 11257/12188 [2:36:30<1:47:12, 6.91s/it] 92%|█████████▏| 11258/12188 [2:36:38<1:50:49, 7.15s/it] {'loss': 0.3125, 'grad_norm': 0.6856174586239226, 'learning_rate': 1.5224422845856678e-07, 'epoch': 0.92} + 92%|█████████▏| 11258/12188 [2:36:38<1:50:49, 7.15s/it] 92%|█████████▏| 11259/12188 [2:36:48<2:03:54, 8.00s/it] {'loss': 0.2656, 'grad_norm': 0.6738536224866295, 'learning_rate': 1.5191901430103185e-07, 'epoch': 0.92} + 92%|█████████▏| 11259/12188 [2:36:48<2:03:54, 8.00s/it] 92%|█████████▏| 11260/12188 [2:36:55<2:00:12, 7.77s/it] {'loss': 0.3423, 'grad_norm': 0.7683674773179275, 'learning_rate': 1.5159414250718706e-07, 'epoch': 0.92} + 92%|█████████▏| 11260/12188 [2:36:55<2:00:12, 7.77s/it] 92%|█████████▏| 11261/12188 [2:37:02<1:55:29, 7.48s/it] {'loss': 0.2939, 'grad_norm': 0.7292299860785303, 'learning_rate': 1.5126961309997355e-07, 'epoch': 0.92} + 92%|█████████▏| 11261/12188 [2:37:02<1:55:29, 7.48s/it] 92%|█████████▏| 11262/12188 [2:37:08<1:51:15, 7.21s/it] {'loss': 0.2898, 'grad_norm': 1.0298944830488077, 'learning_rate': 1.5094542610231133e-07, 'epoch': 0.92} + 92%|█████████▏| 11262/12188 [2:37:08<1:51:15, 7.21s/it] 92%|█████████▏| 11263/12188 [2:37:16<1:50:50, 7.19s/it] {'loss': 0.318, 'grad_norm': 0.6529395781996743, 'learning_rate': 1.506215815370915e-07, 'epoch': 0.92} + 92%|█████████▏| 11263/12188 [2:37:16<1:50:50, 7.19s/it] 92%|█████████▏| 11264/12188 [2:37:23<1:50:24, 7.17s/it] {'loss': 0.2859, 'grad_norm': 0.802965452328457, 'learning_rate': 1.5029807942718466e-07, 'epoch': 0.92} + 92%|█████████▏| 11264/12188 [2:37:23<1:50:24, 7.17s/it] 92%|█████████▏| 11265/12188 [2:37:30<1:49:29, 7.12s/it] {'loss': 0.3221, 'grad_norm': 0.7465025949547018, 'learning_rate': 1.4997491979543587e-07, 'epoch': 0.92} + 92%|█████████▏| 11265/12188 [2:37:30<1:49:29, 7.12s/it] 92%|█████████▏| 11266/12188 [2:37:36<1:47:28, 6.99s/it] {'loss': 0.3195, 'grad_norm': 0.7188419083298134, 'learning_rate': 1.496521026646658e-07, 'epoch': 0.92} + 92%|█████████▏| 11266/12188 [2:37:36<1:47:28, 6.99s/it] 92%|█████████▏| 11267/12188 [2:37:43<1:46:12, 6.92s/it] {'loss': 0.2977, 'grad_norm': 0.7917251894568961, 'learning_rate': 1.4932962805767225e-07, 'epoch': 0.92} + 92%|█████████▏| 11267/12188 [2:37:43<1:46:12, 6.92s/it] 92%|█████████▏| 11268/12188 [2:37:50<1:46:17, 6.93s/it] {'loss': 0.2873, 'grad_norm': 0.7572032526828749, 'learning_rate': 1.4900749599722653e-07, 'epoch': 0.92} + 92%|█████████▏| 11268/12188 [2:37:50<1:46:17, 6.93s/it] 92%|█████████▏| 11269/12188 [2:37:57<1:47:20, 7.01s/it] {'loss': 0.2741, 'grad_norm': 0.7197928337015733, 'learning_rate': 1.4868570650607816e-07, 'epoch': 0.92} + 92%|█████████▏| 11269/12188 [2:37:57<1:47:20, 7.01s/it] 92%|█████████▏| 11270/12188 [2:38:04<1:45:36, 6.90s/it] {'loss': 0.2909, 'grad_norm': 0.6927579975986298, 'learning_rate': 1.4836425960695123e-07, 'epoch': 0.92} + 92%|█████████▏| 11270/12188 [2:38:04<1:45:36, 6.90s/it] 92%|█████████▏| 11271/12188 [2:38:12<1:48:30, 7.10s/it] {'loss': 0.3262, 'grad_norm': 0.7125494849992684, 'learning_rate': 1.4804315532254475e-07, 'epoch': 0.92} + 92%|█████████▏| 11271/12188 [2:38:12<1:48:30, 7.10s/it] 92%|█████████▏| 11272/12188 [2:38:19<1:47:58, 7.07s/it] {'loss': 0.2944, 'grad_norm': 1.3746964400576367, 'learning_rate': 1.4772239367553564e-07, 'epoch': 0.92} + 92%|█████████▏| 11272/12188 [2:38:19<1:47:58, 7.07s/it] 92%|█████████▏| 11273/12188 [2:38:25<1:46:30, 6.98s/it] {'loss': 0.2947, 'grad_norm': 0.6957650839102615, 'learning_rate': 1.474019746885752e-07, 'epoch': 0.92} + 92%|█████████▏| 11273/12188 [2:38:25<1:46:30, 6.98s/it] 93%|█████████▎| 11274/12188 [2:38:32<1:46:36, 7.00s/it] {'loss': 0.3077, 'grad_norm': 0.6845335140098815, 'learning_rate': 1.4708189838429087e-07, 'epoch': 0.92} + 93%|█████████▎| 11274/12188 [2:38:32<1:46:36, 7.00s/it] 93%|█████████▎| 11275/12188 [2:38:39<1:43:37, 6.81s/it] {'loss': 0.3072, 'grad_norm': 0.7828182380917426, 'learning_rate': 1.4676216478528683e-07, 'epoch': 0.93} + 93%|█████████▎| 11275/12188 [2:38:39<1:43:37, 6.81s/it] 93%|█████████▎| 11276/12188 [2:38:46<1:45:17, 6.93s/it] {'loss': 0.2986, 'grad_norm': 0.9204560858714774, 'learning_rate': 1.4644277391414107e-07, 'epoch': 0.93} + 93%|█████████▎| 11276/12188 [2:38:46<1:45:17, 6.93s/it] 93%|█████████▎| 11277/12188 [2:38:53<1:44:32, 6.89s/it] {'loss': 0.2791, 'grad_norm': 0.672105590845439, 'learning_rate': 1.4612372579340893e-07, 'epoch': 0.93} + 93%|█████████▎| 11277/12188 [2:38:53<1:44:32, 6.89s/it] 93%|█████████▎| 11278/12188 [2:39:00<1:44:07, 6.87s/it] {'loss': 0.2895, 'grad_norm': 0.6414297847175648, 'learning_rate': 1.4580502044562016e-07, 'epoch': 0.93} + 93%|█████████▎| 11278/12188 [2:39:00<1:44:07, 6.87s/it] 93%|█████████▎| 11279/12188 [2:39:07<1:45:36, 6.97s/it] {'loss': 0.3122, 'grad_norm': 0.7939280076706571, 'learning_rate': 1.454866578932823e-07, 'epoch': 0.93} + 93%|█████████▎| 11279/12188 [2:39:07<1:45:36, 6.97s/it] 93%|█████████▎| 11280/12188 [2:39:14<1:47:52, 7.13s/it] {'loss': 0.2828, 'grad_norm': 0.6990257368229809, 'learning_rate': 1.4516863815887795e-07, 'epoch': 0.93} + 93%|█████████▎| 11280/12188 [2:39:14<1:47:52, 7.13s/it] 93%|█████████▎| 11281/12188 [2:39:21<1:45:54, 7.01s/it] {'loss': 0.3093, 'grad_norm': 0.7020860322405502, 'learning_rate': 1.4485096126486355e-07, 'epoch': 0.93} + 93%|█████████▎| 11281/12188 [2:39:21<1:45:54, 7.01s/it] 93%|█████████▎| 11282/12188 [2:39:28<1:46:59, 7.08s/it] {'loss': 0.271, 'grad_norm': 0.680619909466316, 'learning_rate': 1.4453362723367449e-07, 'epoch': 0.93} + 93%|█████████▎| 11282/12188 [2:39:28<1:46:59, 7.08s/it] 93%|█████████▎| 11283/12188 [2:39:35<1:44:57, 6.96s/it] {'loss': 0.3128, 'grad_norm': 0.7368264140373766, 'learning_rate': 1.4421663608772009e-07, 'epoch': 0.93} + 93%|█████████▎| 11283/12188 [2:39:35<1:44:57, 6.96s/it] 93%|█████████▎| 11284/12188 [2:39:44<1:56:41, 7.75s/it] {'loss': 0.2573, 'grad_norm': 0.7279520784199934, 'learning_rate': 1.4389998784938465e-07, 'epoch': 0.93} + 93%|█████████▎| 11284/12188 [2:39:44<1:56:41, 7.75s/it] 93%|█████████▎| 11285/12188 [2:39:52<1:56:07, 7.72s/it] {'loss': 0.2856, 'grad_norm': 0.7101492595077764, 'learning_rate': 1.4358368254103084e-07, 'epoch': 0.93} + 93%|█████████▎| 11285/12188 [2:39:52<1:56:07, 7.72s/it] 93%|█████████▎| 11286/12188 [2:39:59<1:50:15, 7.33s/it] {'loss': 0.3511, 'grad_norm': 0.7490153697532634, 'learning_rate': 1.4326772018499414e-07, 'epoch': 0.93} + 93%|█████████▎| 11286/12188 [2:39:59<1:50:15, 7.33s/it] 93%|█████████▎| 11287/12188 [2:40:09<2:03:48, 8.24s/it] {'loss': 0.2839, 'grad_norm': 0.7018214439604121, 'learning_rate': 1.4295210080358835e-07, 'epoch': 0.93} + 93%|█████████▎| 11287/12188 [2:40:09<2:03:48, 8.24s/it] 93%|█████████▎| 11288/12188 [2:40:16<1:57:03, 7.80s/it] {'loss': 0.2705, 'grad_norm': 0.7088565374260156, 'learning_rate': 1.4263682441910233e-07, 'epoch': 0.93} + 93%|█████████▎| 11288/12188 [2:40:16<1:57:03, 7.80s/it] 93%|█████████▎| 11289/12188 [2:40:23<1:54:32, 7.64s/it] {'loss': 0.3492, 'grad_norm': 0.7076885984526969, 'learning_rate': 1.4232189105379935e-07, 'epoch': 0.93} + 93%|█████████▎| 11289/12188 [2:40:23<1:54:32, 7.64s/it] 93%|█████████▎| 11290/12188 [2:40:30<1:50:20, 7.37s/it] {'loss': 0.2777, 'grad_norm': 0.6629864691810556, 'learning_rate': 1.4200730072991996e-07, 'epoch': 0.93} + 93%|█████████▎| 11290/12188 [2:40:30<1:50:20, 7.37s/it] 93%|█████████▎| 11291/12188 [2:40:37<1:51:02, 7.43s/it] {'loss': 0.3231, 'grad_norm': 0.7442051457595176, 'learning_rate': 1.4169305346968033e-07, 'epoch': 0.93} + 93%|█████████▎| 11291/12188 [2:40:37<1:51:02, 7.43s/it] 93%|█████████▎| 11292/12188 [2:40:45<1:50:54, 7.43s/it] {'loss': 0.2546, 'grad_norm': 0.6818417066236159, 'learning_rate': 1.4137914929527097e-07, 'epoch': 0.93} + 93%|█████████▎| 11292/12188 [2:40:45<1:50:54, 7.43s/it] 93%|█████████▎| 11293/12188 [2:40:52<1:48:09, 7.25s/it] {'loss': 0.2503, 'grad_norm': 0.636313605806986, 'learning_rate': 1.410655882288614e-07, 'epoch': 0.93} + 93%|█████████▎| 11293/12188 [2:40:52<1:48:09, 7.25s/it] 93%|█████████▎| 11294/12188 [2:40:59<1:48:03, 7.25s/it] {'loss': 0.3092, 'grad_norm': 0.7601996582458497, 'learning_rate': 1.4075237029259227e-07, 'epoch': 0.93} + 93%|█████████▎| 11294/12188 [2:40:59<1:48:03, 7.25s/it] 93%|█████████▎| 11295/12188 [2:41:06<1:47:22, 7.21s/it] {'loss': 0.2873, 'grad_norm': 0.6839123202993102, 'learning_rate': 1.404394955085847e-07, 'epoch': 0.93} + 93%|█████████▎| 11295/12188 [2:41:06<1:47:22, 7.21s/it] 93%|█████████▎| 11296/12188 [2:41:14<1:51:17, 7.49s/it] {'loss': 0.2775, 'grad_norm': 0.7009476336276218, 'learning_rate': 1.401269638989322e-07, 'epoch': 0.93} + 93%|█████████▎| 11296/12188 [2:41:14<1:51:17, 7.49s/it] 93%|█████████▎| 11297/12188 [2:41:21<1:48:01, 7.27s/it] {'loss': 0.2721, 'grad_norm': 0.6141613079041155, 'learning_rate': 1.3981477548570544e-07, 'epoch': 0.93} + 93%|█████████▎| 11297/12188 [2:41:21<1:48:01, 7.27s/it] 93%|█████████▎| 11298/12188 [2:41:29<1:50:56, 7.48s/it] {'loss': 0.3045, 'grad_norm': 0.7465150215285211, 'learning_rate': 1.395029302909512e-07, 'epoch': 0.93} + 93%|█████████▎| 11298/12188 [2:41:29<1:50:56, 7.48s/it] 93%|█████████▎| 11299/12188 [2:41:35<1:46:45, 7.20s/it] {'loss': 0.2615, 'grad_norm': 0.7317787921275658, 'learning_rate': 1.391914283366902e-07, 'epoch': 0.93} + 93%|█████████▎| 11299/12188 [2:41:35<1:46:45, 7.20s/it] 93%|█████████▎| 11300/12188 [2:41:42<1:44:44, 7.08s/it] {'loss': 0.2653, 'grad_norm': 0.6856968791298728, 'learning_rate': 1.3888026964492208e-07, 'epoch': 0.93} + 93%|█████████▎| 11300/12188 [2:41:42<1:44:44, 7.08s/it] 93%|█████████▎| 11301/12188 [2:41:49<1:45:30, 7.14s/it] {'loss': 0.3164, 'grad_norm': 0.7045228699090543, 'learning_rate': 1.3856945423761813e-07, 'epoch': 0.93} + 93%|█████████▎| 11301/12188 [2:41:49<1:45:30, 7.14s/it] 93%|█████████▎| 11302/12188 [2:41:57<1:47:35, 7.29s/it] {'loss': 0.3273, 'grad_norm': 0.790638855471004, 'learning_rate': 1.3825898213672918e-07, 'epoch': 0.93} + 93%|█████████▎| 11302/12188 [2:41:57<1:47:35, 7.29s/it] 93%|█████████▎| 11303/12188 [2:42:04<1:47:33, 7.29s/it] {'loss': 0.3108, 'grad_norm': 0.6328617515799009, 'learning_rate': 1.379488533641804e-07, 'epoch': 0.93} + 93%|█████████▎| 11303/12188 [2:42:04<1:47:33, 7.29s/it] 93%|█████████▎| 11304/12188 [2:42:11<1:45:01, 7.13s/it] {'loss': 0.2939, 'grad_norm': 0.6748686620653734, 'learning_rate': 1.376390679418721e-07, 'epoch': 0.93} + 93%|█████████▎| 11304/12188 [2:42:11<1:45:01, 7.13s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 93%|█████████▎| 11305/12188 [2:42:17<1:38:13, 6.67s/it] {'loss': 0.6326, 'grad_norm': 0.5941015981458683, 'learning_rate': 1.373296258916801e-07, 'epoch': 0.93} + 93%|█████████▎| 11305/12188 [2:42:17<1:38:13, 6.67s/it] 93%|█████████▎| 11306/12188 [2:42:25<1:45:19, 7.17s/it] {'loss': 0.2561, 'grad_norm': 0.6921429813202267, 'learning_rate': 1.3702052723545856e-07, 'epoch': 0.93} + 93%|█████████▎| 11306/12188 [2:42:25<1:45:19, 7.17s/it] 93%|█████████▎| 11307/12188 [2:42:33<1:48:39, 7.40s/it] {'loss': 0.2984, 'grad_norm': 0.6750367777492761, 'learning_rate': 1.3671177199503338e-07, 'epoch': 0.93} + 93%|█████████▎| 11307/12188 [2:42:33<1:48:39, 7.40s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 93%|█████████▎| 11308/12188 [2:42:39<1:41:53, 6.95s/it] {'loss': 0.659, 'grad_norm': 0.6413041222352377, 'learning_rate': 1.364033601922099e-07, 'epoch': 0.93} + 93%|█████████▎| 11308/12188 [2:42:39<1:41:53, 6.95s/it] 93%|█████████▎| 11309/12188 [2:42:46<1:42:52, 7.02s/it] {'loss': 0.2986, 'grad_norm': 0.6838643220441148, 'learning_rate': 1.360952918487668e-07, 'epoch': 0.93} + 93%|█████████▎| 11309/12188 [2:42:46<1:42:52, 7.02s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 93%|█████████▎| 11310/12188 [2:42:52<1:37:36, 6.67s/it] {'loss': 0.6251, 'grad_norm': 0.5589323959064033, 'learning_rate': 1.3578756698646056e-07, 'epoch': 0.93} + 93%|█████████▎| 11310/12188 [2:42:52<1:37:36, 6.67s/it] 93%|█████████▎| 11311/12188 [2:42:59<1:38:44, 6.76s/it] {'loss': 0.3301, 'grad_norm': 0.8389283740413721, 'learning_rate': 1.3548018562702104e-07, 'epoch': 0.93} + 93%|█████████▎| 11311/12188 [2:42:59<1:38:44, 6.76s/it] 93%|█████████▎| 11312/12188 [2:43:06<1:39:29, 6.81s/it] {'loss': 0.332, 'grad_norm': 0.6868274065964147, 'learning_rate': 1.3517314779215473e-07, 'epoch': 0.93} + 93%|█████████▎| 11312/12188 [2:43:06<1:39:29, 6.81s/it] 93%|█████████▎| 11313/12188 [2:43:13<1:41:33, 6.96s/it] {'loss': 0.2837, 'grad_norm': 0.7211425969332999, 'learning_rate': 1.3486645350354599e-07, 'epoch': 0.93} + 93%|█████████▎| 11313/12188 [2:43:13<1:41:33, 6.96s/it] 93%|█████████▎| 11314/12188 [2:43:20<1:43:10, 7.08s/it] {'loss': 0.251, 'grad_norm': 0.7044976190954186, 'learning_rate': 1.345601027828508e-07, 'epoch': 0.93} + 93%|█████████▎| 11314/12188 [2:43:20<1:43:10, 7.08s/it] 93%|█████████▎| 11315/12188 [2:43:27<1:41:38, 6.99s/it] {'loss': 0.2974, 'grad_norm': 0.6949885223795877, 'learning_rate': 1.342540956517041e-07, 'epoch': 0.93} + 93%|█████████▎| 11315/12188 [2:43:27<1:41:38, 6.99s/it] 93%|█████████▎| 11316/12188 [2:43:34<1:41:23, 6.98s/it] {'loss': 0.2906, 'grad_norm': 0.6732604517291711, 'learning_rate': 1.3394843213171638e-07, 'epoch': 0.93} + 93%|█████████▎| 11316/12188 [2:43:34<1:41:23, 6.98s/it] 93%|█████████▎| 11317/12188 [2:43:42<1:43:01, 7.10s/it] {'loss': 0.3141, 'grad_norm': 0.6847160622196143, 'learning_rate': 1.33643112244472e-07, 'epoch': 0.93} + 93%|█████████▎| 11317/12188 [2:43:42<1:43:01, 7.10s/it] 93%|█████████▎| 11318/12188 [2:43:49<1:44:41, 7.22s/it] {'loss': 0.2923, 'grad_norm': 0.7261649275644493, 'learning_rate': 1.3333813601153322e-07, 'epoch': 0.93} + 93%|█████████▎| 11318/12188 [2:43:49<1:44:41, 7.22s/it] 93%|█████████▎| 11319/12188 [2:43:56<1:43:40, 7.16s/it] {'loss': 0.2966, 'grad_norm': 0.6650380406200468, 'learning_rate': 1.330335034544361e-07, 'epoch': 0.93} + 93%|█████████▎| 11319/12188 [2:43:56<1:43:40, 7.16s/it] 93%|█████████▎| 11320/12188 [2:44:04<1:44:46, 7.24s/it] {'loss': 0.2755, 'grad_norm': 0.7041378636604472, 'learning_rate': 1.3272921459469345e-07, 'epoch': 0.93} + 93%|█████████▎| 11320/12188 [2:44:04<1:44:46, 7.24s/it] 93%|█████████▎| 11321/12188 [2:44:10<1:43:02, 7.13s/it] {'loss': 0.2977, 'grad_norm': 0.7143249755790838, 'learning_rate': 1.3242526945379363e-07, 'epoch': 0.93} + 93%|█████████▎| 11321/12188 [2:44:10<1:43:02, 7.13s/it] 93%|█████████▎| 11322/12188 [2:44:20<1:55:11, 7.98s/it] {'loss': 0.2892, 'grad_norm': 0.6981224193967194, 'learning_rate': 1.3212166805320114e-07, 'epoch': 0.93} + 93%|█████████▎| 11322/12188 [2:44:20<1:55:11, 7.98s/it] 93%|█████████▎| 11323/12188 [2:44:31<2:05:17, 8.69s/it] {'loss': 0.3152, 'grad_norm': 0.6930833058424749, 'learning_rate': 1.3181841041435605e-07, 'epoch': 0.93} + 93%|█████████▎| 11323/12188 [2:44:31<2:05:17, 8.69s/it] 93%|█████████▎| 11324/12188 [2:44:38<1:58:01, 8.20s/it] {'loss': 0.3207, 'grad_norm': 0.7221053615693147, 'learning_rate': 1.315154965586729e-07, 'epoch': 0.93} + 93%|█████████▎| 11324/12188 [2:44:38<1:58:01, 8.20s/it] 93%|█████████▎| 11325/12188 [2:44:45<1:53:24, 7.89s/it] {'loss': 0.3049, 'grad_norm': 0.7109661205774788, 'learning_rate': 1.3121292650754402e-07, 'epoch': 0.93} + 93%|█████████▎| 11325/12188 [2:44:45<1:53:24, 7.89s/it] 93%|█████████▎| 11326/12188 [2:44:52<1:47:41, 7.50s/it] {'loss': 0.3151, 'grad_norm': 0.6949056039308639, 'learning_rate': 1.3091070028233622e-07, 'epoch': 0.93} + 93%|█████████▎| 11326/12188 [2:44:52<1:47:41, 7.50s/it] 93%|█████████▎| 11327/12188 [2:44:58<1:45:23, 7.34s/it] {'loss': 0.3002, 'grad_norm': 0.6941905273293811, 'learning_rate': 1.306088179043913e-07, 'epoch': 0.93} + 93%|█████████▎| 11327/12188 [2:44:59<1:45:23, 7.34s/it] 93%|█████████▎| 11328/12188 [2:45:05<1:42:06, 7.12s/it] {'loss': 0.3039, 'grad_norm': 0.6836598605125512, 'learning_rate': 1.303072793950283e-07, 'epoch': 0.93} + 93%|█████████▎| 11328/12188 [2:45:05<1:42:06, 7.12s/it] 93%|█████████▎| 11329/12188 [2:45:15<1:52:49, 7.88s/it] {'loss': 0.3167, 'grad_norm': 0.7600689144142867, 'learning_rate': 1.3000608477554188e-07, 'epoch': 0.93} + 93%|█████████▎| 11329/12188 [2:45:15<1:52:49, 7.88s/it] 93%|█████████▎| 11330/12188 [2:45:23<1:53:59, 7.97s/it] {'loss': 0.2849, 'grad_norm': 0.7585052886304504, 'learning_rate': 1.2970523406720114e-07, 'epoch': 0.93} + 93%|█████████▎| 11330/12188 [2:45:23<1:53:59, 7.97s/it] 93%|█████████▎| 11331/12188 [2:45:29<1:47:32, 7.53s/it] {'loss': 0.2643, 'grad_norm': 0.6698011475470186, 'learning_rate': 1.2940472729125241e-07, 'epoch': 0.93} + 93%|█████████▎| 11331/12188 [2:45:29<1:47:32, 7.53s/it] 93%|█████████▎| 11332/12188 [2:45:38<1:50:30, 7.75s/it] {'loss': 0.3188, 'grad_norm': 0.673581164724142, 'learning_rate': 1.291045644689165e-07, 'epoch': 0.93} + 93%|█████████▎| 11332/12188 [2:45:38<1:50:30, 7.75s/it] 93%|█████████▎| 11333/12188 [2:45:44<1:45:51, 7.43s/it] {'loss': 0.3464, 'grad_norm': 0.7806936010675306, 'learning_rate': 1.2880474562139033e-07, 'epoch': 0.93} + 93%|█████████▎| 11333/12188 [2:45:44<1:45:51, 7.43s/it] 93%|█████████▎| 11334/12188 [2:45:52<1:45:14, 7.39s/it] {'loss': 0.2549, 'grad_norm': 0.6739885711115707, 'learning_rate': 1.28505270769847e-07, 'epoch': 0.93} + 93%|█████████▎| 11334/12188 [2:45:52<1:45:14, 7.39s/it] 93%|█████████▎| 11335/12188 [2:45:58<1:41:04, 7.11s/it] {'loss': 0.2629, 'grad_norm': 0.7003562725094278, 'learning_rate': 1.2820613993543452e-07, 'epoch': 0.93} + 93%|█████████▎| 11335/12188 [2:45:58<1:41:04, 7.11s/it] 93%|█████████▎| 11336/12188 [2:46:08<1:52:12, 7.90s/it] {'loss': 0.2976, 'grad_norm': 0.7631966357521495, 'learning_rate': 1.2790735313927825e-07, 'epoch': 0.93} + 93%|█████████▎| 11336/12188 [2:46:08<1:52:12, 7.90s/it] 93%|█████████▎| 11337/12188 [2:46:16<1:51:57, 7.89s/it] {'loss': 0.2568, 'grad_norm': 0.6800373789383901, 'learning_rate': 1.2760891040247637e-07, 'epoch': 0.93} + 93%|█████████▎| 11337/12188 [2:46:16<1:51:57, 7.89s/it] 93%|█████████▎| 11338/12188 [2:46:22<1:46:48, 7.54s/it] {'loss': 0.2692, 'grad_norm': 1.6113733767600635, 'learning_rate': 1.2731081174610526e-07, 'epoch': 0.93} + 93%|█████████▎| 11338/12188 [2:46:22<1:46:48, 7.54s/it] 93%|█████████▎| 11339/12188 [2:46:30<1:45:07, 7.43s/it] {'loss': 0.3428, 'grad_norm': 0.707282805431762, 'learning_rate': 1.270130571912165e-07, 'epoch': 0.93} + 93%|█████████▎| 11339/12188 [2:46:30<1:45:07, 7.43s/it] 93%|█████████▎| 11340/12188 [2:46:36<1:41:31, 7.18s/it] {'loss': 0.309, 'grad_norm': 0.6564622608464757, 'learning_rate': 1.2671564675883595e-07, 'epoch': 0.93} + 93%|█████████▎| 11340/12188 [2:46:36<1:41:31, 7.18s/it] 93%|█████████▎| 11341/12188 [2:46:44<1:42:00, 7.23s/it] {'loss': 0.2602, 'grad_norm': 0.6791310117114143, 'learning_rate': 1.2641858046996636e-07, 'epoch': 0.93} + 93%|█████████▎| 11341/12188 [2:46:44<1:42:00, 7.23s/it] 93%|█████████▎| 11342/12188 [2:46:50<1:39:59, 7.09s/it] {'loss': 0.3109, 'grad_norm': 0.7527641645214167, 'learning_rate': 1.261218583455881e-07, 'epoch': 0.93} + 93%|█████████▎| 11342/12188 [2:46:50<1:39:59, 7.09s/it] 93%|█████████▎| 11343/12188 [2:46:57<1:38:50, 7.02s/it] {'loss': 0.3072, 'grad_norm': 0.6868359930979714, 'learning_rate': 1.2582548040665276e-07, 'epoch': 0.93} + 93%|█████████▎| 11343/12188 [2:46:57<1:38:50, 7.02s/it] 93%|█████████▎| 11344/12188 [2:47:07<1:50:12, 7.84s/it] {'loss': 0.271, 'grad_norm': 0.6076745054600249, 'learning_rate': 1.2552944667409139e-07, 'epoch': 0.93} + 93%|█████████▎| 11344/12188 [2:47:07<1:50:12, 7.84s/it] 93%|█████████▎| 11345/12188 [2:47:15<1:52:00, 7.97s/it] {'loss': 0.301, 'grad_norm': 0.7163281338945134, 'learning_rate': 1.2523375716880836e-07, 'epoch': 0.93} + 93%|█████████▎| 11345/12188 [2:47:15<1:52:00, 7.97s/it] 93%|█████████▎| 11346/12188 [2:47:22<1:47:31, 7.66s/it] {'loss': 0.2717, 'grad_norm': 0.7391176797439462, 'learning_rate': 1.2493841191168587e-07, 'epoch': 0.93} + 93%|█████████▎| 11346/12188 [2:47:22<1:47:31, 7.66s/it] 93%|█████████▎| 11347/12188 [2:47:30<1:48:11, 7.72s/it] {'loss': 0.303, 'grad_norm': 0.7846302363280865, 'learning_rate': 1.2464341092358113e-07, 'epoch': 0.93} + 93%|█████████▎| 11347/12188 [2:47:30<1:48:11, 7.72s/it] 93%|█████████▎| 11348/12188 [2:47:37<1:44:44, 7.48s/it] {'loss': 0.3119, 'grad_norm': 0.7594341624361188, 'learning_rate': 1.2434875422532467e-07, 'epoch': 0.93} + 93%|█████████▎| 11348/12188 [2:47:37<1:44:44, 7.48s/it] 93%|█████████▎| 11349/12188 [2:47:44<1:42:17, 7.32s/it] {'loss': 0.3078, 'grad_norm': 0.7038000402288146, 'learning_rate': 1.2405444183772654e-07, 'epoch': 0.93} + 93%|█████████▎| 11349/12188 [2:47:44<1:42:17, 7.32s/it] 93%|█████████▎| 11350/12188 [2:47:51<1:42:07, 7.31s/it] {'loss': 0.2545, 'grad_norm': 0.6855759649424332, 'learning_rate': 1.2376047378156952e-07, 'epoch': 0.93} + 93%|█████████▎| 11350/12188 [2:47:51<1:42:07, 7.31s/it] 93%|█████████▎| 11351/12188 [2:47:58<1:40:12, 7.18s/it] {'loss': 0.2987, 'grad_norm': 0.6814069309972353, 'learning_rate': 1.2346685007761428e-07, 'epoch': 0.93} + 93%|█████████▎| 11351/12188 [2:47:58<1:40:12, 7.18s/it] 93%|█████████▎| 11352/12188 [2:48:05<1:38:05, 7.04s/it] {'loss': 0.3108, 'grad_norm': 0.655526912081561, 'learning_rate': 1.2317357074659476e-07, 'epoch': 0.93} + 93%|█████████▎| 11352/12188 [2:48:05<1:38:05, 7.04s/it] 93%|█████████▎| 11353/12188 [2:48:12<1:37:14, 6.99s/it] {'loss': 0.2639, 'grad_norm': 0.8699570882456708, 'learning_rate': 1.2288063580922272e-07, 'epoch': 0.93} + 93%|█████████▎| 11353/12188 [2:48:12<1:37:14, 6.99s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f32486045e0> +[Try #0] Failed to fetch sample 4411617 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f32486045e0> +Problematic sample: {'image': '20240827_145511_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Q2790: 100%'"}, {'from': 'gpt', 'value': '\nclick(x=0.8995, y=0.972)\n'}]} + 93%|█████████▎| 11354/12188 [2:48:19<1:39:30, 7.16s/it] {'loss': 0.3003, 'grad_norm': 0.7119757122995868, 'learning_rate': 1.2258804528618495e-07, 'epoch': 0.93} + 93%|█████████▎| 11354/12188 [2:48:19<1:39:30, 7.16s/it] 93%|█████████▎| 11355/12188 [2:48:26<1:38:16, 7.08s/it] {'loss': 0.2714, 'grad_norm': 0.7621756272455273, 'learning_rate': 1.2229579919814217e-07, 'epoch': 0.93} + 93%|█████████▎| 11355/12188 [2:48:26<1:38:16, 7.08s/it] 93%|█████████▎| 11356/12188 [2:48:33<1:38:47, 7.12s/it] {'loss': 0.2875, 'grad_norm': 0.6939199309257095, 'learning_rate': 1.2200389756573395e-07, 'epoch': 0.93} + 93%|█████████▎| 11356/12188 [2:48:33<1:38:47, 7.12s/it] 93%|█████████▎| 11357/12188 [2:48:41<1:40:01, 7.22s/it] {'loss': 0.2403, 'grad_norm': 0.729784320617588, 'learning_rate': 1.2171234040957324e-07, 'epoch': 0.93} + 93%|█████████▎| 11357/12188 [2:48:41<1:40:01, 7.22s/it] 93%|█████████▎| 11358/12188 [2:48:48<1:38:43, 7.14s/it] {'loss': 0.3396, 'grad_norm': 0.734907345661316, 'learning_rate': 1.2142112775024972e-07, 'epoch': 0.93} + 93%|█████████▎| 11358/12188 [2:48:48<1:38:43, 7.14s/it] 93%|█████████▎| 11359/12188 [2:48:54<1:35:48, 6.93s/it] {'loss': 0.3044, 'grad_norm': 0.7034837253532163, 'learning_rate': 1.2113025960832748e-07, 'epoch': 0.93} + 93%|█████████▎| 11359/12188 [2:48:54<1:35:48, 6.93s/it] 93%|█████████▎| 11360/12188 [2:49:01<1:35:12, 6.90s/it] {'loss': 0.2928, 'grad_norm': 0.7228845870567022, 'learning_rate': 1.2083973600434894e-07, 'epoch': 0.93} + 93%|█████████▎| 11360/12188 [2:49:01<1:35:12, 6.90s/it] 93%|█████████▎| 11361/12188 [2:49:08<1:35:09, 6.90s/it] {'loss': 0.2889, 'grad_norm': 0.6886639220026388, 'learning_rate': 1.205495569588283e-07, 'epoch': 0.93} + 93%|█████████▎| 11361/12188 [2:49:08<1:35:09, 6.90s/it]W0818 14:01:58.340000 128930 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:01:58.340000 128930 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:01:58.340000 128930 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:01:58.340000 128930 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:01:58.336000 76611 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:01:58.336000 76611 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:01:58.336000 76611 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:01:58.336000 76611 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:01:58.991000 15944 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:01:58.991000 15944 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:01:58.991000 15944 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:01:58.991000 15944 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:01:59.200000 122561 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:01:59.200000 122561 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:01:59.200000 122561 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:01:59.200000 122561 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:02:09.146000 129728 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:02:09.146000 129728 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:02:09.146000 129728 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:02:09.146000 129728 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:02:17.397000 129871 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:02:17.397000 129871 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:02:17.397000 129871 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:02:17.397000 129871 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:02:18.111000 122698 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:02:18.111000 122698 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:02:18.111000 122698 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:02:18.111000 122698 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:02:18.230000 16841 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:02:18.230000 16841 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:02:18.230000 16841 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:02:18.230000 16841 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:02:18.352000 11071 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:02:18.352000 11071 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:02:18.352000 11071 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:02:18.352000 11071 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:02:18.386000 9690 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:02:18.386000 9690 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:02:18.386000 9690 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:02:18.386000 9690 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:02:18.562000 123468 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:02:18.562000 123468 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:02:18.562000 123468 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:02:18.562000 123468 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:02:18.724000 13613 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:02:18.724000 13613 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:02:18.724000 13613 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:02:18.724000 13613 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:02:20.020000 111761 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:02:20.020000 111761 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:02:20.020000 111761 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:02:20.020000 111761 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:05:14.241000 21126 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:05:14.241000 21126 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:05:14.241000 21126 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:05:14.241000 21126 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:05:14.830000 20595 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:05:14.830000 20595 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:05:14.830000 20595 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:05:14.830000 20595 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:05:15.112000 26884 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:05:15.112000 26884 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:05:15.112000 26884 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:05:15.112000 26884 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:05:16.215000 124493 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:05:16.215000 124493 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:05:16.215000 124493 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:05:16.215000 124493 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:05:22.081000 86295 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:05:22.081000 86295 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:05:22.081000 86295 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:05:22.081000 86295 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:05:23.457000 103211 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:05:23.457000 103211 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:05:23.457000 103211 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:05:23.457000 103211 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 14:05:46,818] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:46,818] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:46,819] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:46,833] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:46,833] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:46,834] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:46,834] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:46,834] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:46,834] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:46,834] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:52,656] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:05:52,656] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:05:52,656] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:05:52,656] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:05:52,656] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:05:52,656] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:05:52,656] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:05:52,656] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:05:52,660] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:05:52,661] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:05:52,661] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:05:52,661] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:05:52,663] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:05:52,665] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:05:52,665] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:05:52,665] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:05:53,152] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:05:53,224] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:05:53,259] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:05:53,267] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:05:53,270] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:05:53,275] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:05:53,276] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:05:53,277] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:05:53,279] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:05:53,430] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:05:53,443] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:05:53,445] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:05:53,446] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:05:53,449] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:05:53,451] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:05:53,453] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:05:54,787] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,813] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,823] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,824] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,837] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,829] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,838] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,834] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,840] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,844] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,836] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,836] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,836] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,849] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,850] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,850] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,850] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,852] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,852] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,852] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,850] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,850] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,850] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,850] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,850] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,850] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,850] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,849] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,849] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,849] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,850] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,850] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:05:54,850] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:06:01,388] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,389] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,394] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,394] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,395] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,386] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,388] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,388] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,394] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,394] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,395] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,398] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,399] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,404] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,405] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,397] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,411] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,664] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,664] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,664] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,664] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,664] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-18 14:06:01,665] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,665] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,666] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,667] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:01,834] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to moYou are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:01,873] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:01,946] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:06:01,953] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:01,952] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:06:01,953] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:01,956] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:01,957] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:01,961] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:01,971[2025-08-18 14:06:01,972] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:06:01,973] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +r initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:01,989] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:02,001] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:06:02,002] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:06:02,003] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:06:02,005] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:06:02,006] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:02,007] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:02,172] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:02,297] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:02,305] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:06:02,306] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:06:02,310] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:02,312] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:02,313] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:06:02,314] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:05,073] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:06:05,074] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:06:05,074] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:06:05,074] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:06:05,074] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:06:05,075] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:06:05,075] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:06:05,075] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:06:13,697] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:13,698] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:13,698] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:13,699] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:13,699] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:13,699] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:13,699] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:13,700] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:06:14,151] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:14,267] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:06:14,267] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:06:14,268] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:06:14,269] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:14,272] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:06:14,273] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:06:14,273] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +W0818 14:25:43.661000 117154 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:25:43.661000 117154 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:25:43.661000 117154 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:25:43.661000 117154 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:25:46.840000 63586 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:25:46.840000 63586 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:25:46.840000 63586 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:25:46.840000 63586 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:25:49.564000 2433 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:25:49.564000 2433 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:25:49.564000 2433 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:25:49.564000 2433 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:25:52.068000 75510 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:25:52.068000 75510 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:25:52.068000 75510 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:25:52.068000 75510 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:25:52.710000 6463 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:25:52.710000 6463 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:25:52.710000 6463 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:25:52.710000 6463 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:25:52.824000 85539 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:25:52.824000 85539 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:25:52.824000 85539 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:25:52.824000 85539 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:25:53.210000 62917 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:25:53.210000 62917 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:25:53.210000 62917 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:25:53.210000 62917 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:25:53.721000 62079 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:25:53.721000 62079 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:25:53.721000 62079 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:25:53.721000 62079 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 14:26:10,049] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:10,050] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:10,050] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:10,050] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:10,054] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:10,054] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:10,054] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:10,054] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:13,866] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:26:13,867] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:26:13,867] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:26:13,867] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:26:13,867] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:26:13,868] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:26:13,868] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:26:13,869] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:26:14,338] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:26:14,429] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:26:14,446] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:26:14,447] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:26:14,450] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:26:14,450] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:26:14,454] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:26:14,454] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:26:21,497] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:21,498] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:21,498] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:21,499] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:21,502] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:21,502] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:21,502] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:21,502] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:28,596] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:26:28,596] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:26:28,596] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:26:28,597] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:26:28,597] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:26:28,597] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:26:28,597] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:26:28,597] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:26:29,094] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:26:29,221] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:26:29,225] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:26:29,227] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:26:29,229] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:26:29,230] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:26:29,232] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:26:29,232] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:26:29,348] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:29,348] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:29,357] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:29,358] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:29,359] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:29,367] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:29,367] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:26:29,367] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +W0818 14:39:35.680000 38602 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:39:35.680000 38602 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:39:35.680000 38602 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:39:35.680000 38602 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +*** +W0818 14:39:35.683000 104001 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:39:35.683000 104001 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:39:35.683000 104001 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:39:35.683000 104001 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:39:35.888000 27993 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:39:35.888000 27993 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:39:35.888000 27993 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:39:35.888000 27993 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:39:35.947000 92797 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:39:35.947000 92797 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:39:35.947000 92797 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:39:35.947000 92797 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:39:36.425000 118298 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:39:36.425000 118298 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:39:36.425000 118298 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:39:36.425000 118298 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:39:36.783000 74285 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:39:36.783000 74285 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:39:36.783000 74285 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:39:36.783000 74285 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:55:00.334000 79917 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:55:00.334000 79917 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:55:00.334000 79917 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:55:00.334000 79917 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:55:00.324000 123263 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:55:00.324000 123263 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:55:00.324000 123263 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:55:00.324000 123263 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:55:01.528000 11477 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:55:01.528000 11477 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:55:01.528000 11477 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:55:01.528000 11477 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:55:04.918000 34397 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:55:04.918000 34397 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:55:04.918000 34397 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:55:04.918000 34397 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:55:08.826000 30172 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:55:08.826000 30172 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:55:08.826000 30172 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:55:08.826000 30172 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 14:55:25,986] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:25,986] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:25,986] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:25,987] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:25,988] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:25,988] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:25,988] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:25,988] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,010] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,010] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,010] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,011] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,011] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,011] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,011] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,012] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,013] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,017] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,014] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,015] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,017] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,017] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,017] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,017] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,021] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,021] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,021] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,021] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:26,021] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:55:30,230] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,230] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,224] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,230] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,230] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,230] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,231] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,232] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,223] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,229] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,229] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,229] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,229] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,229] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,229] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,223] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,223] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,224] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,224] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,224] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,224] [INFO] [comm.py:652:init_distributed] cdb=None +ing TorchBackend in DeepSpeed with backend nccl +[2025-08-18 14:55:30,225] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,225] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,228] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:55:30,684] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:55:30,717] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:55:30,738] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:55:30,789] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:55:30,792] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:55:30,793] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:55:30,798] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:55:30,799] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:55:30,800] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 + on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:55:30,797] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:55:30,798] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:55:30,798] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:55:30,838] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:55:30,851] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:55:30,852] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:55:30,852] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:55:30,856] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:55:30,859] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:55:30,865] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:55:30,866] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:55:30,868] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:55:30,869] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:55:30,870] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:55:30,870] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +W0818 14:58:01.288000 87739 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:58:01.288000 87739 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:58:01.288000 87739 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:58:01.288000 87739 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:58:01.460000 87736 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:58:01.460000 87736 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:58:01.460000 87736 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:58:01.460000 87736 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:58:01.722000 18922 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:58:01.722000 18922 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:58:01.722000 18922 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:58:01.722000 18922 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:58:01.749000 1285 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:58:01.749000 1285 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:58:01.749000 1285 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:58:01.749000 1285 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:58:02.368000 18690 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:58:02.368000 18690 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:58:02.368000 18690 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:58:02.368000 18690 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:58:04.134000 37616 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:58:04.134000 37616 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:58:04.134000 37616 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:58:04.134000 37616 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:58:05.510000 22437 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 14:58:05.510000 22437 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 14:58:05.510000 22437 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 14:58:05.510000 22437 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 14:58:21,928] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:21,944] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:21,944] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:21,944] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:21,956] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:21,961] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:21,961] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:21,969] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:21,974] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:21,976] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:21,978] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:21,978] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:21,978] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:22,029] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:22,030] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:22,030] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:22,030] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:22,030] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:22,032] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:22,032] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:22,032] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:23,011] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:23,025] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:23,047] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:23,047] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:23,052] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:23,052] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:23,061] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:23,061] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:25,098] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:25,122] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:25,123] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:25,125] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:25,125] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:25,126] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:25,126] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:25,126] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:25,350] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:25,359] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:25,395] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:25,395] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:25,395] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:25,395] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:25,396] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:25,396] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:25,396] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:25,390] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:25,397] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:25,391] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:25,391] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:25,392] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:25,392] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:25,393] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:25,408] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:25,409] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:25,409] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:25,409] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:25,410] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:25,410] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:25,826] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:25,840] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:25,871] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:25,935] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:25,946] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:25,948] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:25,949] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:25,952] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:25,952] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:25,954] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:25,967] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:25,967] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:25,971] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:25,972] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:25,972] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:25,975] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:25,975] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:26,024] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:26,024] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:26,026] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:26,027] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:26,027] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:26,028] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:26,028] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:26,603] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:26,603] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:26,603] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:26,606] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:26,606] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:26,607] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:26,609] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:26,612] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:27,305] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:27,785] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:27,786] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:27,787] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:27,787] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:27,788] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:27,788] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:27,788] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:27,919] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:29,076] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:29,077] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:29,082] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:29,082] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:29,084] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:29,084] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:29,093] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:30,357] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:30,358] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:30,358] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:30,359] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:30,360] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:30,365] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:30,371] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:30,386] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:30,941] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:31,142] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:31,145] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:31,147] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:31,151] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:31,159] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:31,160] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:31,165] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:31,641] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:31,641] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:31,641] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:31,641] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:31,642] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:31,642] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:31,642] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:31,643] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:32,128] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:32,262] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:32,264] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:32,265] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:32,268] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:32,268] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:32,270] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:32,270] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:35,146] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:35,146] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:35,146] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:35,146] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:35,146] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:35,146] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:35,146] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:35,147] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:35,640] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:35,755] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:35,774] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:35,780] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:35,782] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:35,784] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:35,785] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:35,785] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:47,416] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:47,456] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:47,489] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:47,491] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:47,492] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:47,492] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:47,494] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:47,494] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 14:58:56,735] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:56,736] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:56,736] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:56,737] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:56,737] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:56,737] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-18 14:58:56,738] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:56,742] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:56,743] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 14:58:57,485] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:57,628] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:57,651] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:57,681] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:57,690] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:57,694] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 14:58:57,699] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 14:58:57,702] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +W0818 15:02:14.390000 28441 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 15:02:14.390000 28441 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 15:02:14.390000 28441 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 15:02:14.390000 28441 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 15:02:14.534000 98359 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 15:02:14.534000 98359 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 15:02:14.534000 98359 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 15:02:14.534000 98359 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 15:02:14.580000 98004 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 15:02:14.580000 98004 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 15:02:14.580000 98004 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 15:02:14.580000 98004 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 15:02:15.160000 11368 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 15:02:15.160000 11368 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 15:02:15.160000 11368 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 15:02:15.160000 11368 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 15:02:16.478000 50046 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 15:02:16.478000 50046 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 15:02:16.478000 50046 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 15:02:16.478000 50046 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 15:02:18.146000 31066 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 15:02:18.146000 31066 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 15:02:18.146000 31066 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 15:02:18.146000 31066 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 15:02:21.974000 62696 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 15:02:21.974000 62696 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 15:02:21.974000 62696 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 15:02:21.974000 62696 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 15:02:30,346] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:30,349] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:30,417] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:30,418] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:30,418] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:30,418] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:30,418] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:30,418] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:30,563] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:30,647] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:30,647] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:30,648] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:30,648] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:30,648] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:30,648] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:30,648] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:31,048] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:31,052] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:31,067] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:31,068] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:31,070] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:31,070] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:31,083] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:31,096] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:31,614] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:31,619] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:31,629] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:31,689] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:31,689] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:31,749] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:31,750] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:31,760] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:32,906] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:32,911] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:32,915] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:32,915] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:32,916] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:32,916] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:32,916] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:32,927] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:32,926] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:32,944] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:32,952] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:32,953] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:32,959] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:32,960] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:33,015] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:33,334] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:33,357] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:33,456] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:33,462] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:02:33,464] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:02:33,465] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +[2025-08-18 15:02:33,474] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:33,484] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:02:33,487] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:02:33,489] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:33,489] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:02:33,491] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:33,751] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:33,752] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:33,753] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:33,753] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:33,760] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:33,761] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:33,761] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:33,761] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:34,015] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:34,015] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:34,015] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:34,017] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:34,018] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:34,018] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:34,018] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:34,054] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:34,236] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:34,368] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:02:34,369] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:02:34,374] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:34,379] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:34,382] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:34,385] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:02:34,386] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:34,455] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:34,578] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:34,587] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:34,598] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:02:34,598] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:02:34,598] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:02:34,601] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:02:34,601] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:37,433] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:37,433] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:37,443] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:37,453] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:37,453] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:37,472] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:37,472] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:37,473] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:38,750] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:38,750] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:38,758] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:38,758] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:38,782] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:38,782] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:38,788] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:38,788] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:42,593] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:42,593] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:42,601] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:42,602] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:42,602] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:42,604] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:42,605] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:42,609] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:43,157] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:43,310] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:43,327] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:43,356] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:43,368] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:02:43,375] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:02:43,377] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:43,383] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:43,761] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:43,762] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:43,762] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-18 15:02:43,763] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:43,763] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:43,764] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:43,785] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:43,788] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:43,792] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:02:44,411] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:44,632] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:02:44,634] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:02:44,636] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:02:44,638] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:44,643] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:02:44,644] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:44,654] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:02:57,379] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:57,379] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:57,396] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:57,410] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:57,417] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:57,423] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:57,423] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:57,423] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:57,423] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:57,428] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:57,438] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:57,440] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:57,440] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:57,440] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:57,440] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:02:57,440] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 15:03:06,859] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:03:06,859] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:03:06,859] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:03:06,859] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:03:06,859] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:03:06,860] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:03:06,861] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:03:06,863] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:03:06,869] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:03:06,869] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:03:06,869] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:03:06,869] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:03:06,870] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:03:06,871] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:03:06,871] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:03:06,882] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 15:03:07,384] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:03:07,477] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:03:07,495] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:03:07,497] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:03:07,497] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:03:07,497] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:03:07,502] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:03:07,504] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:03:07,513] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:03:07,514] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:03:07,514] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 15:03:07,522] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:03:07,524] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:03:07,525] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 15:03:07,525] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 621928 samples from VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl +Rank 0: Loading altas_windows +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 537672 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl +Rank 0: Loading altas_linux +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 21578 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl +Rank 0: Loading atlas_macos +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 3680 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl +Rank 0: Loading android_action_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 5621 samples from VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 11980 samples from VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl +Rank 0: Loading web_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9459 samples from VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 328 samples from VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 54 samples from VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 480 samples from VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 240 samples from VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 944 samples from VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 472 samples from VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading mac_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 1578 samples from VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20394 samples from VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl +Rank 0: Loading web_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 14285 samples from VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl +Rank 0: Loading android_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 3590 samples from VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 21422 samples from VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 100 samples from VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_aug_cropping_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 7675 samples from VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20116 samples from VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl +Rank 0: Loading iphone_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2520 samples from VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl +Rank 0: Loading mac_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7814 samples from VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl +Rank 0: Loading android_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 17838 samples from VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading android_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9008 samples from VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_canvas_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 624 samples from VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 100652 samples from VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl with all sampling strategy +Rank 0: Loaded 28346 samples from VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl +Rank 0: Loading android_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 4907 samples from VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2635 samples from VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5234 samples from VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading ubuntu_action_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl with all sampling strategy +Rank 0: Loaded 3404 samples from VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_1 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2855 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_2 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 655 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_3 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_4 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2643 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_1 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_2 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_3 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_4 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_5 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_6 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_7 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_8 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_crop_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 358 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7946 samples from VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3973 samples from VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 993 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 630 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 249 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2538 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1269 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 172 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl +Rank 0: Loading android_ocr_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl with all sampling strategy +Rank 0: Loaded 29878 samples from VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl +Rank 0: Loading mac_orc_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl with all sampling strategy +Rank 0: Loaded 4393 samples from VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 254 samples from VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_click_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl with random:80% sampling strategy +Rank 0: Loaded 1802 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 53 samples from VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 6578 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 3287 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 542 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_crop_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 270 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 10908 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 5453 samples from VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading android_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13950 samples from VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl +Rank 0: Loading windows_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 12390 samples from VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl +Rank 0: Loading web_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13402 samples from VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl +Rank 0: Loading icon_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl with all sampling strategy +Rank 0: Loaded 81303 samples from VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl +Rank 0: Loading mac_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 1251 samples from VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl +Rank 0: Loading iphone_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 27849 samples from VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl +Rank 0: Loading web_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl with random:80% sampling strategy +Rank 0: Loaded 51607 samples from VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl +Rank 0: Loading android_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl with random:80% sampling strategy +Rank 0: Loaded 6281 samples from VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl +Rank 0: Loading windows_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 25961 samples from VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl +Rank 0: Loading windows_cropping_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl with random:40% sampling strategy +Rank 0: Loaded 9763 samples from VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl +Rank 0: Loading iphone_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl with all sampling strategy +Rank 0: Loaded 16896 samples from VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl +Rank 0: Loading iphone_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl with all sampling strategy +Rank 0: Loaded 927 samples from VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl +Rank 0: Loading mac_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl with all sampling strategy +Rank 0: Loaded 3051 samples from VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl +Rank 0: Loading android_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 25217 samples from VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading android_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 12677 samples from VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading web_canvas_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl with random:40% sampling strategy +Rank 0: Loaded 1566 samples from VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl +Rank 0: Loading web_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 174170 samples from VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 24862 samples from VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl +Rank 0: Loading android_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl with random:80% sampling strategy +Rank 0: Loaded 9142 samples from VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl +Rank 0: Loading windows_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 4229 samples from VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 4200 samples from VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl with random:80% sampling strategy +Rank 0: Loaded 2868 samples from VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_crop_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1372 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 590 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl with all sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1692 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1077 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1070 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 431 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 292 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1509 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 1207 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading uibert_train_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 4646 samples from VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl +Rank 0: Loading openapp_taperception_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 2500 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_widget_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 14878 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_mug_grounding_d240812 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl with all sampling strategy +Rank 0: Loaded 26090 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl +Rank 0: Loading private_ui_phone_2403_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 24798 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ground_d240521_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl with all sampling strategy +Rank 0: Loaded 5008 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl +Rank 0: Loading private_ui_aig_share_2406_ground_d240612_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl with all sampling strategy +Rank 0: Loaded 7903 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl +Rank 0: Loading windows_pc_agent_e_planning_cot +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 55564 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl +Rank 0: Loading windows_pc_agent_e_navigation +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl with all sampling strategy +Rank 0: Loaded 27782 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl +Rank 0: Loading os_genesis_ac_training_data +Rank 0: Skipping os_genesis_ac_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_aw_training_data +Rank 0: Skipping os_genesis_aw_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_web_training +Rank 0: Skipping os_genesis_web_training due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_1 +Rank 0: Skipping gui_odyssey_plus_1 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_2 +Rank 0: Skipping gui_odyssey_plus_2 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_custom_3 +Rank 0: Skipping gui_odyssey_plus_custom_3 due to repeat_time=0 +Rank 0: Loading mm_gui_mid +Rank 0: Skipping mm_gui_mid due to repeat_time=0 +Rank 0: Loading text_gui_mid +Rank 0: Skipping text_gui_mid due to repeat_time=0 +Rank 0: Loading gui_mid_trajectory +Rank 0: Skipping gui_mid_trajectory due to repeat_time=0 +Rank 0: Loading ubuntu_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7024 samples from VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl +Rank 0: Loading windows_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3144 samples from VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl +Rank 0: Total training samples: 6240940 +Rank 0: Formatting inputs...Skip in lazy mode +Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. +Rank 0: Length of multimodal samples: 6230528, pure textual samples: 9984 +Parameter Offload: Total persistent parameters: 755712 in 408 params + 0%| | 0/12188 [00:00 + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f3532c78950> +[Try #0] Failed to fetch sample 4385311 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f3532c78950> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'HyperKitty'"}, {'from': 'gpt', 'value': '\nclick(x=0.4145, y=0.2765)\n'}]} + 91%|█████████▏| 11124/12188 [17:49<2:19:38, 7.87s/it] {'loss': 0.2836, 'grad_norm': 0.6790239151767222, 'learning_rate': 1.9890985911810322e-07, 'epoch': 0.91} + 91%|█████████▏| 11124/12188 [17:49<2:19:38, 7.87s/it] 91%|█████████▏| 11125/12188 [17:56<2:16:17, 7.69s/it] {'loss': 0.2791, 'grad_norm': 0.6587040853166876, 'learning_rate': 1.9853898562667174e-07, 'epoch': 0.91} + 91%|█████████▏| 11125/12188 [17:56<2:16:17, 7.69s/it] 91%|█████████▏| 11126/12188 [18:03<2:11:31, 7.43s/it] {'loss': 0.2889, 'grad_norm': 0.6824961113752172, 'learning_rate': 1.981684512067028e-07, 'epoch': 0.91} + 91%|█████████▏| 11126/12188 [18:03<2:11:31, 7.43s/it] 91%|█████████▏| 11127/12188 [18:09<2:07:36, 7.22s/it] {'loss': 0.3162, 'grad_norm': 0.7097017207673376, 'learning_rate': 1.9779825588436276e-07, 'epoch': 0.91} + 91%|█████████▏| 11127/12188 [18:09<2:07:36, 7.22s/it] 91%|█████████▏| 11128/12188 [18:17<2:08:57, 7.30s/it] {'loss': 0.2832, 'grad_norm': 0.7033965695901531, 'learning_rate': 1.9742839968579562e-07, 'epoch': 0.91} + 91%|█████████▏| 11128/12188 [18:17<2:08:57, 7.30s/it] 91%|█████████▏| 11129/12188 [18:24<2:08:53, 7.30s/it] {'loss': 0.2795, 'grad_norm': 0.7193806735637294, 'learning_rate': 1.9705888263711837e-07, 'epoch': 0.91} + 91%|█████████▏| 11129/12188 [18:24<2:08:53, 7.30s/it] 91%|█████████▏| 11130/12188 [18:31<2:05:46, 7.13s/it] {'loss': 0.3196, 'grad_norm': 0.7785751533874746, 'learning_rate': 1.9668970476442617e-07, 'epoch': 0.91} + 91%|█████████▏| 11130/12188 [18:31<2:05:46, 7.13s/it] 91%|█████████▏| 11131/12188 [18:38<2:07:17, 7.23s/it] {'loss': 0.2826, 'grad_norm': 0.747706814522192, 'learning_rate': 1.9632086609379041e-07, 'epoch': 0.91} + 91%|█████████▏| 11131/12188 [18:38<2:07:17, 7.23s/it] 91%|█████████▏| 11132/12188 [18:45<2:06:59, 7.22s/it] {'loss': 0.3364, 'grad_norm': 0.6666321133748045, 'learning_rate': 1.9595236665125694e-07, 'epoch': 0.91} + 91%|█████████▏| 11132/12188 [18:45<2:06:59, 7.22s/it] 91%|█████████▏| 11133/12188 [18:55<2:17:00, 7.79s/it] {'loss': 0.2844, 'grad_norm': 0.9020386624153149, 'learning_rate': 1.9558420646284937e-07, 'epoch': 0.91} + 91%|█████████▏| 11133/12188 [18:55<2:17:00, 7.79s/it] 91%|█████████▏| 11134/12188 [19:03<2:19:34, 7.95s/it] {'loss': 0.3147, 'grad_norm': 0.6848070301164295, 'learning_rate': 1.952163855545658e-07, 'epoch': 0.91} + 91%|█████████▏| 11134/12188 [19:03<2:19:34, 7.95s/it] 91%|█████████▏| 11135/12188 [19:10<2:13:23, 7.60s/it] {'loss': 0.3079, 'grad_norm': 0.7347942604126345, 'learning_rate': 1.9484890395238155e-07, 'epoch': 0.91} + 91%|█████████▏| 11135/12188 [19:10<2:13:23, 7.60s/it] 91%|█████████▏| 11136/12188 [19:17<2:09:39, 7.40s/it] {'loss': 0.2926, 'grad_norm': 0.6552335060912623, 'learning_rate': 1.9448176168224863e-07, 'epoch': 0.91} + 91%|█████████▏| 11136/12188 [19:17<2:09:39, 7.40s/it] 91%|█████████▏| 11137/12188 [19:23<2:06:08, 7.20s/it] {'loss': 0.3275, 'grad_norm': 0.7431948659703455, 'learning_rate': 1.941149587700919e-07, 'epoch': 0.91} + 91%|█████████▏| 11137/12188 [19:23<2:06:08, 7.20s/it] 91%|█████████▏| 11138/12188 [19:30<2:03:25, 7.05s/it] {'loss': 0.3061, 'grad_norm': 0.6888139134291714, 'learning_rate': 1.9374849524181617e-07, 'epoch': 0.91} + 91%|█████████▏| 11138/12188 [19:30<2:03:25, 7.05s/it] 91%|█████████▏| 11139/12188 [19:37<2:02:15, 6.99s/it] {'loss': 0.2877, 'grad_norm': 0.770699305983485, 'learning_rate': 1.933823711232996e-07, 'epoch': 0.91} + 91%|█████████▏| 11139/12188 [19:37<2:02:15, 6.99s/it] 91%|█████████▏| 11140/12188 [19:44<2:00:31, 6.90s/it] {'loss': 0.2736, 'grad_norm': 0.702990393012892, 'learning_rate': 1.9301658644039712e-07, 'epoch': 0.91} + 91%|█████████▏| 11140/12188 [19:44<2:00:31, 6.90s/it] 91%|█████████▏| 11141/12188 [19:51<2:01:11, 6.94s/it] {'loss': 0.2947, 'grad_norm': 0.698735287666653, 'learning_rate': 1.9265114121894135e-07, 'epoch': 0.91} + 91%|█████████▏| 11141/12188 [19:51<2:01:11, 6.94s/it] 91%|█████████▏| 11142/12188 [19:59<2:06:48, 7.27s/it] {'loss': 0.3121, 'grad_norm': 0.736530855105418, 'learning_rate': 1.922860354847378e-07, 'epoch': 0.91} + 91%|█████████▏| 11142/12188 [19:59<2:06:48, 7.27s/it] 91%|█████████▏| 11143/12188 [20:08<2:16:20, 7.83s/it] {'loss': 0.3031, 'grad_norm': 0.7624451636130015, 'learning_rate': 1.919212692635708e-07, 'epoch': 0.91} + 91%|█████████▏| 11143/12188 [20:08<2:16:20, 7.83s/it] 91%|█████████▏| 11144/12188 [20:15<2:13:57, 7.70s/it] {'loss': 0.2872, 'grad_norm': 0.7231986919453687, 'learning_rate': 1.915568425811981e-07, 'epoch': 0.91} + 91%|█████████▏| 11144/12188 [20:15<2:13:57, 7.70s/it] 91%|█████████▏| 11145/12188 [20:24<2:18:18, 7.96s/it] {'loss': 0.2788, 'grad_norm': 0.7059618446721315, 'learning_rate': 1.9119275546335637e-07, 'epoch': 0.91} + 91%|█████████▏| 11145/12188 [20:24<2:18:18, 7.96s/it] 91%|█████████▏| 11146/12188 [20:30<2:11:35, 7.58s/it] {'loss': 0.2698, 'grad_norm': 0.8206362994087879, 'learning_rate': 1.9082900793575665e-07, 'epoch': 0.91} + 91%|█████████▏| 11146/12188 [20:30<2:11:35, 7.58s/it] 91%|█████████▏| 11147/12188 [20:37<2:07:25, 7.34s/it] {'loss': 0.2781, 'grad_norm': 0.6909900385957128, 'learning_rate': 1.904656000240851e-07, 'epoch': 0.91} + 91%|█████████▏| 11147/12188 [20:37<2:07:25, 7.34s/it] 91%|█████████▏| 11148/12188 [20:45<2:11:13, 7.57s/it] {'loss': 0.3183, 'grad_norm': 0.7243306221020087, 'learning_rate': 1.901025317540056e-07, 'epoch': 0.91} + 91%|█████████▏| 11148/12188 [20:45<2:11:13, 7.57s/it] 91%|█████████▏| 11149/12188 [20:54<2:14:48, 7.78s/it] {'loss': 0.3329, 'grad_norm': 0.6589712486836504, 'learning_rate': 1.897398031511588e-07, 'epoch': 0.91} + 91%|█████████▏| 11149/12188 [20:54<2:14:48, 7.78s/it] 91%|█████████▏| 11150/12188 [21:01<2:13:50, 7.74s/it] {'loss': 0.2968, 'grad_norm': 0.8506068981417999, 'learning_rate': 1.8937741424115751e-07, 'epoch': 0.91} + 91%|█████████▏| 11150/12188 [21:01<2:13:50, 7.74s/it] 91%|█████████▏| 11151/12188 [21:09<2:14:33, 7.79s/it] {'loss': 0.293, 'grad_norm': 0.6622827029652892, 'learning_rate': 1.8901536504959516e-07, 'epoch': 0.91} + 91%|█████████▏| 11151/12188 [21:09<2:14:33, 7.79s/it] 91%|█████████▏| 11152/12188 [21:16<2:09:16, 7.49s/it] {'loss': 0.3189, 'grad_norm': 0.6502361647317142, 'learning_rate': 1.8865365560203797e-07, 'epoch': 0.91} + 91%|█████████▏| 11152/12188 [21:16<2:09:16, 7.49s/it] 92%|█████████▏| 11153/12188 [21:24<2:14:04, 7.77s/it] {'loss': 0.2904, 'grad_norm': 0.6789267017042745, 'learning_rate': 1.8829228592402938e-07, 'epoch': 0.92} + 92%|█████████▏| 11153/12188 [21:24<2:14:04, 7.77s/it] 92%|█████████▏| 11154/12188 [21:32<2:10:51, 7.59s/it] {'loss': 0.291, 'grad_norm': 0.714092976199147, 'learning_rate': 1.879312560410901e-07, 'epoch': 0.92} + 92%|█████████▏| 11154/12188 [21:32<2:10:51, 7.59s/it] 92%|█████████▏| 11155/12188 [21:38<2:06:32, 7.35s/it] {'loss': 0.2975, 'grad_norm': 0.7759248818197009, 'learning_rate': 1.8757056597871305e-07, 'epoch': 0.92} + 92%|█████████▏| 11155/12188 [21:38<2:06:32, 7.35s/it] 92%|█████████▏| 11156/12188 [21:45<2:04:28, 7.24s/it] {'loss': 0.365, 'grad_norm': 0.7516616697353574, 'learning_rate': 1.8721021576237175e-07, 'epoch': 0.92} + 92%|█████████▏| 11156/12188 [21:45<2:04:28, 7.24s/it] 92%|█████████▏| 11157/12188 [21:53<2:05:01, 7.28s/it] {'loss': 0.2707, 'grad_norm': 0.7283765535471501, 'learning_rate': 1.8685020541751197e-07, 'epoch': 0.92} + 92%|█████████▏| 11157/12188 [21:53<2:05:01, 7.28s/it] 92%|█████████▏| 11158/12188 [22:00<2:02:36, 7.14s/it] {'loss': 0.2765, 'grad_norm': 0.7078251630277431, 'learning_rate': 1.8649053496955838e-07, 'epoch': 0.92} + 92%|█████████▏| 11158/12188 [22:00<2:02:36, 7.14s/it] 92%|█████████▏| 11159/12188 [22:07<2:04:54, 7.28s/it] {'loss': 0.3137, 'grad_norm': 0.691382368270731, 'learning_rate': 1.8613120444390952e-07, 'epoch': 0.92} + 92%|█████████▏| 11159/12188 [22:07<2:04:54, 7.28s/it] 92%|█████████▏| 11160/12188 [22:19<2:26:40, 8.56s/it] {'loss': 0.3672, 'grad_norm': 0.7192342444066734, 'learning_rate': 1.857722138659407e-07, 'epoch': 0.92} + 92%|█████████▏| 11160/12188 [22:19<2:26:40, 8.56s/it] 92%|█████████▏| 11161/12188 [22:26<2:18:04, 8.07s/it] {'loss': 0.2865, 'grad_norm': 0.7378114973524816, 'learning_rate': 1.8541356326100436e-07, 'epoch': 0.92} + 92%|█████████▏| 11161/12188 [22:26<2:18:04, 8.07s/it] 92%|█████████▏| 11162/12188 [22:33<2:12:25, 7.74s/it] {'loss': 0.3088, 'grad_norm': 0.6665529478126981, 'learning_rate': 1.850552526544258e-07, 'epoch': 0.92} + 92%|█████████▏| 11162/12188 [22:33<2:12:25, 7.74s/it] 92%|█████████▏| 11163/12188 [22:40<2:11:47, 7.72s/it] {'loss': 0.279, 'grad_norm': 0.6862573149855202, 'learning_rate': 1.8469728207150982e-07, 'epoch': 0.92} + 92%|█████████▏| 11163/12188 [22:40<2:11:47, 7.72s/it] 92%|█████████▏| 11164/12188 [22:47<2:06:38, 7.42s/it] {'loss': 0.2917, 'grad_norm': 0.7769765726305703, 'learning_rate': 1.8433965153753563e-07, 'epoch': 0.92} + 92%|█████████▏| 11164/12188 [22:47<2:06:38, 7.42s/it] 92%|█████████▏| 11165/12188 [22:54<2:03:21, 7.24s/it] {'loss': 0.3242, 'grad_norm': 0.7887942169010749, 'learning_rate': 1.8398236107775802e-07, 'epoch': 0.92} + 92%|█████████▏| 11165/12188 [22:54<2:03:21, 7.24s/it] 92%|█████████▏| 11166/12188 [23:00<2:00:02, 7.05s/it] {'loss': 0.3037, 'grad_norm': 0.680112151877218, 'learning_rate': 1.8362541071740846e-07, 'epoch': 0.92} + 92%|█████████▏| 11166/12188 [23:00<2:00:02, 7.05s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 90, in pil_loader + return img.convert("RGB") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 993, in convert + self.load() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 319, in load + raise _get_oserror(err_code, encoder=False) +OSError: broken data stream when reading image file +[Try #0] Failed to fetch sample 6014818 in VC:s3://gui-agent/data_20250623/windows_augment/images. Exception: broken data stream when reading image file +Problematic sample: {'image': 'autocad/20250509_125727_1/images/before_screenshot_1_id_121_internvl_position_crop_1_grounding_instructions_point_o_paste.png', 'conversations': [{'from': 'human', 'value': "\nOnly respond with the coordinates: The 'CUSTOMIZE' text is located in the left side of the application window, within the Tool Palettes section. It's positioned at the top of what appears to be a customization panel, directly above the helper text that reads 'Press F1 for more help'."}, {'from': 'gpt', 'value': "The 'CUSTOMIZE' text is located in the left side of the application window, within the Tool Palettes section. It's positioned at the top of what appears to be a customization panel, directly above the helper text that reads 'Press F1 for more help'.[[495, 550, 515, 557]]"}], 'width': 3600, 'height': 2338} + 92%|█████████▏| 11167/12188 [23:10<2:15:12, 7.95s/it] {'loss': 0.3019, 'grad_norm': 0.6690636849490375, 'learning_rate': 1.832688004816946e-07, 'epoch': 0.92} + 92%|█████████▏| 11167/12188 [23:10<2:15:12, 7.95s/it] 92%|█████████▏| 11168/12188 [23:17<2:09:53, 7.64s/it] {'loss': 0.2645, 'grad_norm': 1.2572881179883488, 'learning_rate': 1.8291253039579905e-07, 'epoch': 0.92} + 92%|█████████▏| 11168/12188 [23:17<2:09:53, 7.64s/it] 92%|█████████▏| 11169/12188 [23:24<2:05:24, 7.38s/it] {'loss': 0.2978, 'grad_norm': 0.7028917876222944, 'learning_rate': 1.8255660048488223e-07, 'epoch': 0.92} + 92%|█████████▏| 11169/12188 [23:24<2:05:24, 7.38s/it] 92%|█████████▏| 11170/12188 [23:31<2:02:56, 7.25s/it] {'loss': 0.3347, 'grad_norm': 0.7176969095309031, 'learning_rate': 1.8220101077407738e-07, 'epoch': 0.92} + 92%|█████████▏| 11170/12188 [23:31<2:02:56, 7.25s/it] 92%|█████████▏| 11171/12188 [23:40<2:13:11, 7.86s/it] {'loss': 0.2864, 'grad_norm': 0.7158373179300058, 'learning_rate': 1.8184576128849773e-07, 'epoch': 0.92} + 92%|█████████▏| 11171/12188 [23:40<2:13:11, 7.86s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1348, in _get_item + raise ValueError( +ValueError: Number of image tokens ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'] does not match number of images None +[Try #0] Failed to fetch sample 1057917 in VC:s3://gui/aguvis/aguvis-stage2/amex/images. Exception: Number of image tokens ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'] does not match number of images None +Problematic sample: {'image': ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'], 'conversations': [{'from': 'human', 'value': "\nPlease generate the next move according to the UI screenshot, the task and previous operations.\n\nTask:\nI want to book a hotel in london, prize should be less than $600, guest rating is 8+, 4 star rating, breakfast included\n\nPrevious operations:\nStep 1: Tap on the Chrome app to start searching for hotels in London.\nStep 2: Type 'Chrome' in the search bar to open the Chrome browser.\nStep 3: Tap on the Hotels.com app icon to begin searching for a hotel.\nStep 4: Wait for the Hotels.com app to finish loading to proceed with the hotel search.\nStep 5: Tap on the 'Going to' field to enter London as the destination.\nStep 6: Tap on 'London' from Recent Searches to select it as the destination for the hotel search.\nStep 7: Tap on 'London, England, United Kingdom' to select it as the destination for the hotel search.\nStep 8: Tap on the 'Search' button to view available hotels in London.\nStep 9: Tap on the filter icon or option to apply filters to the hotel search results.\nStep 10: Swipe up on the screen to reveal more filter options, such as guest rating and hotel stars.\nStep 11: Swipe up to reveal more filter options, specifically guest ratings and hotel stars.\nStep 12: Tap on the 4-star rating box to select it.\nStep 13: Tap 'Done' to confirm the current filter selections and proceed.\nStep 14: Swipe up on the screen to locate and select the 'breakfast included' filter option.\nStep 15: Tap on 'Breakfast included' to select it.\nStep 16: Tap on the 'Done' button to apply the filters and view available hotel options.\nStep 17: Tap on the topmost hotel card to view its details.\nStep 18: Tap on the 'Select a room' button to check available room options and their prices.\nStep 19: Tap on 'Select and customize' to proceed with the room selection and customization options.\nStep 20: Tap on 'Breakfast buffet' to select it, then tap on 'Reserve' to book the room."}, {'from': 'gpt', 'value': "\nWait for the loading to complete to confirm the booking status.\n\n\nterminate(status='success')\n"}]} + 92%|█████████▏| 11172/12188 [23:47<2:06:49, 7.49s/it] {'loss': 0.2673, 'grad_norm': 0.6655326255209072, 'learning_rate': 1.814908520532288e-07, 'epoch': 0.92} + 92%|█████████▏| 11172/12188 [23:47<2:06:49, 7.49s/it] 92%|█████████▏| 11173/12188 [23:55<2:08:45, 7.61s/it] {'loss': 0.2725, 'grad_norm': 0.7809866274413236, 'learning_rate': 1.8113628309333497e-07, 'epoch': 0.92} + 92%|█████████▏| 11173/12188 [23:55<2:08:45, 7.61s/it] 92%|█████████▏| 11174/12188 [24:04<2:18:17, 8.18s/it] {'loss': 0.292, 'grad_norm': 0.7103050785699644, 'learning_rate': 1.8078205443385456e-07, 'epoch': 0.92} + 92%|█████████▏| 11174/12188 [24:04<2:18:17, 8.18s/it] 92%|█████████▏| 11175/12188 [24:11<2:10:21, 7.72s/it] {'loss': 0.2725, 'grad_norm': 0.6786267554161841, 'learning_rate': 1.804281660998025e-07, 'epoch': 0.92} + 92%|█████████▏| 11175/12188 [24:11<2:10:21, 7.72s/it] 92%|█████████▏| 11176/12188 [24:19<2:08:55, 7.64s/it] {'loss': 0.36, 'grad_norm': 0.7095972952162718, 'learning_rate': 1.8007461811617e-07, 'epoch': 0.92} + 92%|█████████▏| 11176/12188 [24:19<2:08:55, 7.64s/it] 92%|█████████▏| 11177/12188 [24:26<2:05:56, 7.47s/it] {'loss': 0.2979, 'grad_norm': 0.6537372294375244, 'learning_rate': 1.797214105079248e-07, 'epoch': 0.92} + 92%|█████████▏| 11177/12188 [24:26<2:05:56, 7.47s/it] 92%|█████████▏| 11178/12188 [24:37<2:23:33, 8.53s/it] {'loss': 0.33, 'grad_norm': 0.6863056432077134, 'learning_rate': 1.793685433000081e-07, 'epoch': 0.92} + 92%|█████████▏| 11178/12188 [24:37<2:23:33, 8.53s/it] 92%|█████████▏| 11179/12188 [24:45<2:22:25, 8.47s/it] {'loss': 0.333, 'grad_norm': 0.7478876618257165, 'learning_rate': 1.7901601651734101e-07, 'epoch': 0.92} + 92%|█████████▏| 11179/12188 [24:45<2:22:25, 8.47s/it] 92%|█████████▏| 11180/12188 [24:52<2:14:26, 8.00s/it] {'loss': 0.2889, 'grad_norm': 0.7043269369950891, 'learning_rate': 1.7866383018481704e-07, 'epoch': 0.92} + 92%|█████████▏| 11180/12188 [24:52<2:14:26, 8.00s/it] 92%|█████████▏| 11181/12188 [25:02<2:26:54, 8.75s/it] {'loss': 0.287, 'grad_norm': 0.6728516621565506, 'learning_rate': 1.7831198432730678e-07, 'epoch': 0.92} + 92%|█████████▏| 11181/12188 [25:02<2:26:54, 8.75s/it] 92%|█████████▏| 11182/12188 [25:10<2:20:43, 8.39s/it] {'loss': 0.2746, 'grad_norm': 0.7438410016564606, 'learning_rate': 1.7796047896965817e-07, 'epoch': 0.92} + 92%|█████████▏| 11182/12188 [25:10<2:20:43, 8.39s/it] 92%|█████████▏| 11183/12188 [25:17<2:13:32, 7.97s/it] {'loss': 0.297, 'grad_norm': 0.67703646900804, 'learning_rate': 1.7760931413669303e-07, 'epoch': 0.92} + 92%|█████████▏| 11183/12188 [25:17<2:13:32, 7.97s/it] 92%|█████████▏| 11184/12188 [25:26<2:21:31, 8.46s/it] {'loss': 0.3127, 'grad_norm': 0.6902125534732799, 'learning_rate': 1.7725848985321037e-07, 'epoch': 0.92} + 92%|█████████▏| 11184/12188 [25:26<2:21:31, 8.46s/it] 92%|█████████▏| 11185/12188 [25:34<2:14:29, 8.05s/it] {'loss': 0.288, 'grad_norm': 0.723740009540524, 'learning_rate': 1.7690800614398483e-07, 'epoch': 0.92} + 92%|█████████▏| 11185/12188 [25:34<2:14:29, 8.05s/it] 92%|█████████▏| 11186/12188 [25:40<2:07:07, 7.61s/it] {'loss': 0.3186, 'grad_norm': 0.6888054669229188, 'learning_rate': 1.7655786303376775e-07, 'epoch': 0.92} + 92%|█████████▏| 11186/12188 [25:40<2:07:07, 7.61s/it] 92%|█████████▏| 11187/12188 [25:47<2:03:56, 7.43s/it] {'loss': 0.2765, 'grad_norm': 0.6411556557240754, 'learning_rate': 1.7620806054728434e-07, 'epoch': 0.92} + 92%|█████████▏| 11187/12188 [25:47<2:03:56, 7.43s/it] 92%|█████████▏| 11188/12188 [25:54<1:59:52, 7.19s/it] {'loss': 0.2944, 'grad_norm': 0.6934390484063051, 'learning_rate': 1.758585987092376e-07, 'epoch': 0.92} + 92%|█████████▏| 11188/12188 [25:54<1:59:52, 7.19s/it] 92%|█████████▏| 11189/12188 [26:01<2:01:44, 7.31s/it] {'loss': 0.2882, 'grad_norm': 0.6887914770827288, 'learning_rate': 1.7550947754430615e-07, 'epoch': 0.92} + 92%|█████████▏| 11189/12188 [26:01<2:01:44, 7.31s/it] 92%|█████████▏| 11190/12188 [26:08<1:57:42, 7.08s/it] {'loss': 0.316, 'grad_norm': 1.5085744481882182, 'learning_rate': 1.751606970771441e-07, 'epoch': 0.92} + 92%|█████████▏| 11190/12188 [26:08<1:57:42, 7.08s/it] 92%|█████████▏| 11191/12188 [26:15<1:57:17, 7.06s/it] {'loss': 0.2722, 'grad_norm': 0.7251863389098427, 'learning_rate': 1.7481225733238184e-07, 'epoch': 0.92} + 92%|█████████▏| 11191/12188 [26:15<1:57:17, 7.06s/it] 92%|█████████▏| 11192/12188 [26:23<2:02:16, 7.37s/it] {'loss': 0.2624, 'grad_norm': 0.8470808428606352, 'learning_rate': 1.7446415833462625e-07, 'epoch': 0.92} + 92%|█████████▏| 11192/12188 [26:23<2:02:16, 7.37s/it] 92%|█████████▏| 11193/12188 [26:30<1:59:29, 7.21s/it] {'loss': 0.3011, 'grad_norm': 0.6855259077806963, 'learning_rate': 1.7411640010845832e-07, 'epoch': 0.92} + 92%|█████████▏| 11193/12188 [26:30<1:59:29, 7.21s/it] 92%|█████████▏| 11194/12188 [26:37<1:58:32, 7.16s/it] {'loss': 0.2795, 'grad_norm': 0.6926713446667201, 'learning_rate': 1.7376898267843722e-07, 'epoch': 0.92} + 92%|█████████▏| 11194/12188 [26:37<1:58:32, 7.16s/it] 92%|█████████▏| 11195/12188 [26:44<1:57:33, 7.10s/it] {'loss': 0.2927, 'grad_norm': 0.8818047719603862, 'learning_rate': 1.734219060690967e-07, 'epoch': 0.92} + 92%|█████████▏| 11195/12188 [26:44<1:57:33, 7.10s/it] 92%|█████████▏| 11196/12188 [26:50<1:54:42, 6.94s/it] {'loss': 0.3227, 'grad_norm': 0.7162209402415689, 'learning_rate': 1.730751703049466e-07, 'epoch': 0.92} + 92%|█████████▏| 11196/12188 [26:50<1:54:42, 6.94s/it] 92%|█████████▏| 11197/12188 [26:57<1:54:03, 6.91s/it] {'loss': 0.3272, 'grad_norm': 0.7041706483974421, 'learning_rate': 1.7272877541047284e-07, 'epoch': 0.92} + 92%|█████████▏| 11197/12188 [26:57<1:54:03, 6.91s/it] 92%|█████████▏| 11198/12188 [27:05<1:55:54, 7.02s/it] {'loss': 0.2736, 'grad_norm': 0.7083710945464733, 'learning_rate': 1.7238272141013702e-07, 'epoch': 0.92} + 92%|█████████▏| 11198/12188 [27:05<1:55:54, 7.02s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 899, in process_image_unified + visual_processed = processor.preprocess(image, return_tensors="pt") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 432, in preprocess + patches, image_grid_thw = self._preprocess( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 252, in _preprocess + resized_height, resized_width = smart_resize( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 69, in smart_resize + raise ValueError(f"height:{height} and width:{width} must be larger than factor:{factor}") +ValueError: height:21 and width:135 must be larger than factor:28 +[Try #0] Failed to fetch sample 2206265 in VC:s3://gui-agent/jedi/images/figma400k/figma400k_extracted/. Exception: height:21 and width:135 must be larger than factor:28 +Problematic sample: {'image': 'a4b739659e7c325d58fab0a3e55e135c875dc9a485e2b2af942dbafdf4662908.png', 'conversations': [{'from': 'human', 'value': "\nThe Date picker trigger button icon's intended function:\nActs as a date picker trigger that opens a calendar dropdown when clicked, allowing users to select dates visually rather than typing them manually. It's part of the browser's default date input functionality."}, {'from': 'gpt', 'value': '[[674, 238, 756, 762]]'}, {'from': 'human', 'value': "The spatial layout of this Date picker trigger button icon:\nLocated at the absolute right edge of a date input field, aligned vertically in the center. The triangle appears immediately adjacent to another UI control in the same date field.\n\nRegarding the Date picker trigger button icon's functionality:\nActs as a date picker trigger that opens a calendar dropdown when clicked, allowing users to select dates visually rather than typing them manually. It's part of the browser's default date input functionality."}, {'from': 'gpt', 'value': '[[674, 238, 756, 762]]'}, {'from': 'human', 'value': 'Location and arrangement of this Date picker trigger button icon:\nLocated at the absolute right edge of a date input field, aligned vertically in the center. The triangle appears immediately adjacent to another UI control in the same date field.'}, {'from': 'gpt', 'value': '[[674, 238, 756, 762]]'}, {'from': 'human', 'value': "The Button (Spinner Button)'s visual characteristics are as follows:\nA small downward-pointing arrow button that's part of a number input control. The arrow has a dark color and appears in a square container with border styling.\n\nPositional details of the Button (Spinner Button):\nThis button is positioned on the right side of a date input field, located in a row with two identical date input controls. Each date field has a dd/mm/yyyy format placeholder text."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[785, 190, 852, 857]]'}, {'from': 'human', 'value': "The visual appearance of this Button (Spinner Button) is as follows:\n\nVisual Description: A small downward-pointing arrow button that's part of a number input control. The arrow has a dark color and appears in a square container with border styling."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[785, 190, 852, 857]]'}, {'from': 'human', 'value': "This Button (Spinner Button) serves the following purpose:\nWhen clicked, this button decreases the value in the associated date input field. It's part of the browser's default date picker UI controls that allow users to adjust date values without typing."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[785, 190, 852, 857]]'}, {'from': 'human', 'value': "The User Profile Component's appearance can be described as:\nA compact user profile display showing the username 'NotTerry' accompanied by a small circular avatar with a cyan/blue profile picture\n\nThe User Profile Component's placement in the interface:\nLocated in the live chat section on the right side of the interface, integrated into the message stream, appearing as a message sender's identification"}, {'from': 'gpt', 'value': '[[785, 190, 852, 857]]'}, {'from': 'human', 'value': "The User Profile Component's appearance can be described as:\nA compact user profile display showing the username 'NotTerry' accompanied by a small circular avatar with a cyan/blue profile picture\n\nFunctional capabilities of the User Profile Component:\nRepresents a user's identity and serves as a header for their chat messages in the live chat interface"}, {'from': 'gpt', 'value': '[[785, 190, 852, 857]]'}, {'from': 'human', 'value': "The visual attributes of this User Profile Component are:\nA compact user profile display showing the username 'NotTerry' accompanied by a small circular avatar with a cyan/blue profile picture\n\nThis User Profile Component is positioned as follows:\nLocated in the live chat section on the right side of the interface, integrated into the message stream, appearing as a message sender's identification\n\nUsage and purpose of this User Profile Component:\nRepresents a user's identity and serves as a header for their chat messages in the live chat interface"}, {'from': 'gpt', 'value': '[[785, 190, 852, 857]]'}, {'from': 'human', 'value': "Here's a detailed visual description of the Navigation Button:\nA navigation icon featuring three small squares arranged in a grid pattern, located within a dark navy interface. The icon appears to use a faintly glowing blue color against the dark background.\n\nThis Navigation Button allows users to:\nA navigation button that likely serves as a 'MAIN' menu control, allowing users to access primary navigation options or return to the main view of the application."}, {'from': 'gpt', 'value': '[[0, 0, 1000, 1000]]'}, {'from': 'human', 'value': "The position of this Navigation Button can be described as:\nThe icon is positioned in the upper-left side of the interface, just below the RustyLoot logo. It's the first item in what appears to be a vertical navigation menu with other gaming-related options.\n\nThis Navigation Button serves the following purpose:\nA navigation button that likely serves as a 'MAIN' menu control, allowing users to access primary navigation options or return to the main view of the application."}, {'from': 'gpt', 'value': '[[0, 0, 1000, 1000]]'}, {'from': 'human', 'value': "The Navigation Button's appearance can be described as:\nA navigation icon featuring three small squares arranged in a grid pattern, located within a dark navy interface. The icon appears to use a faintly glowing blue color against the dark background.\n\nPositional details of the Navigation Button:\nThe icon is positioned in the upper-left side of the interface, just below the RustyLoot logo. It's the first item in what appears to be a vertical navigation menu with other gaming-related options."}, {'from': 'gpt', 'value': '[[0, 0, 1000, 1000]]'}]} + 92%|█████████▏| 11199/12188 [27:11<1:54:57, 6.97s/it] {'loss': 0.3236, 'grad_norm': 0.6422549319169341, 'learning_rate': 1.7203700832837844e-07, 'epoch': 0.92} + 92%|█████████▏| 11199/12188 [27:11<1:54:57, 6.97s/it] 92%|█████████▏| 11200/12188 [27:19<1:56:31, 7.08s/it] {'loss': 0.2906, 'grad_norm': 0.6706554855671941, 'learning_rate': 1.7169163618960817e-07, 'epoch': 0.92} + 92%|█████████▏| 11200/12188 [27:19<1:56:31, 7.08s/it] 92%|█████████▏| 11201/12188 [27:26<1:55:03, 6.99s/it] {'loss': 0.3021, 'grad_norm': 0.6871138142749993, 'learning_rate': 1.713466050182183e-07, 'epoch': 0.92} + 92%|█████████▏| 11201/12188 [27:26<1:55:03, 6.99s/it] 92%|█████████▏| 11202/12188 [27:33<1:56:15, 7.07s/it] {'loss': 0.2991, 'grad_norm': 0.7558258544955695, 'learning_rate': 1.7100191483857275e-07, 'epoch': 0.92} + 92%|█████████▏| 11202/12188 [27:33<1:56:15, 7.07s/it] 92%|█████████▏| 11203/12188 [27:40<1:55:53, 7.06s/it] {'loss': 0.3121, 'grad_norm': 0.7219405299571628, 'learning_rate': 1.7065756567501424e-07, 'epoch': 0.92} + 92%|█████████▏| 11203/12188 [27:40<1:55:53, 7.06s/it] 92%|█████████▏| 11204/12188 [27:47<1:57:14, 7.15s/it] {'loss': 0.2916, 'grad_norm': 0.6773306168951593, 'learning_rate': 1.7031355755185886e-07, 'epoch': 0.92} + 92%|█████████▏| 11204/12188 [27:47<1:57:14, 7.15s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 92%|█████████▏| 11205/12188 [27:54<1:53:49, 6.95s/it] {'loss': 0.706, 'grad_norm': 0.6018711635603365, 'learning_rate': 1.6996989049340106e-07, 'epoch': 0.92} + 92%|█████████▏| 11205/12188 [27:54<1:53:49, 6.95s/it] 92%|█████████▏| 11206/12188 [28:04<2:12:51, 8.12s/it] {'loss': 0.3045, 'grad_norm': 0.7058866049001195, 'learning_rate': 1.6962656452390925e-07, 'epoch': 0.92} + 92%|█████████▏| 11206/12188 [28:05<2:12:51, 8.12s/it] 92%|█████████▏| 11207/12188 [28:11<2:06:03, 7.71s/it] {'loss': 0.3173, 'grad_norm': 0.7958907963937737, 'learning_rate': 1.6928357966762843e-07, 'epoch': 0.92} + 92%|█████████▏| 11207/12188 [28:11<2:06:03, 7.71s/it] 92%|█████████▏| 11208/12188 [28:18<2:02:28, 7.50s/it] {'loss': 0.2874, 'grad_norm': 0.6831109396860476, 'learning_rate': 1.689409359487809e-07, 'epoch': 0.92} + 92%|█████████▏| 11208/12188 [28:18<2:02:28, 7.50s/it] 92%|█████████▏| 11209/12188 [28:26<2:01:13, 7.43s/it] {'loss': 0.3048, 'grad_norm': 0.7679612549296528, 'learning_rate': 1.6859863339156235e-07, 'epoch': 0.92} + 92%|█████████▏| 11209/12188 [28:26<2:01:13, 7.43s/it] 92%|█████████▏| 11210/12188 [28:32<1:58:36, 7.28s/it] {'loss': 0.2719, 'grad_norm': 0.7522566614668038, 'learning_rate': 1.6825667202014617e-07, 'epoch': 0.92} + 92%|█████████▏| 11210/12188 [28:32<1:58:36, 7.28s/it] 92%|█████████▏| 11211/12188 [28:39<1:56:44, 7.17s/it] {'loss': 0.2828, 'grad_norm': 0.7085457953924862, 'learning_rate': 1.6791505185868085e-07, 'epoch': 0.92} + 92%|█████████▏| 11211/12188 [28:39<1:56:44, 7.17s/it] 92%|█████████▏| 11212/12188 [28:47<1:56:28, 7.16s/it] {'loss': 0.287, 'grad_norm': 0.6784143747565388, 'learning_rate': 1.675737729312915e-07, 'epoch': 0.92} + 92%|█████████▏| 11212/12188 [28:47<1:56:28, 7.16s/it] 92%|█████████▏| 11213/12188 [28:53<1:54:56, 7.07s/it] {'loss': 0.3049, 'grad_norm': 0.7440677954789073, 'learning_rate': 1.6723283526207833e-07, 'epoch': 0.92} + 92%|█████████▏| 11213/12188 [28:53<1:54:56, 7.07s/it] 92%|█████████▏| 11214/12188 [29:00<1:54:12, 7.03s/it] {'loss': 0.2897, 'grad_norm': 0.6854798952489147, 'learning_rate': 1.6689223887511818e-07, 'epoch': 0.92} + 92%|█████████▏| 11214/12188 [29:00<1:54:12, 7.03s/it] 92%|█████████▏| 11215/12188 [29:07<1:53:19, 6.99s/it] {'loss': 0.3139, 'grad_norm': 0.689652540840843, 'learning_rate': 1.6655198379446347e-07, 'epoch': 0.92} + 92%|█████████▏| 11215/12188 [29:07<1:53:19, 6.99s/it] 92%|█████████▏| 11216/12188 [29:14<1:53:36, 7.01s/it] {'loss': 0.3031, 'grad_norm': 0.7101108427652664, 'learning_rate': 1.6621207004414108e-07, 'epoch': 0.92} + 92%|█████████▏| 11216/12188 [29:14<1:53:36, 7.01s/it] 92%|█████████▏| 11217/12188 [29:21<1:51:52, 6.91s/it] {'loss': 0.2824, 'grad_norm': 0.7490698155557923, 'learning_rate': 1.6587249764815628e-07, 'epoch': 0.92} + 92%|█████████▏| 11217/12188 [29:21<1:51:52, 6.91s/it] 92%|█████████▏| 11218/12188 [29:29<1:58:10, 7.31s/it] {'loss': 0.3019, 'grad_norm': 0.8108542946996977, 'learning_rate': 1.6553326663048986e-07, 'epoch': 0.92} + 92%|█████████▏| 11218/12188 [29:29<1:58:10, 7.31s/it] 92%|█████████▏| 11219/12188 [29:37<2:02:54, 7.61s/it] {'loss': 0.3129, 'grad_norm': 0.7571538081860191, 'learning_rate': 1.6519437701509654e-07, 'epoch': 0.92} + 92%|█████████▏| 11219/12188 [29:38<2:02:54, 7.61s/it] 92%|█████████▏| 11220/12188 [29:46<2:07:51, 7.93s/it] {'loss': 0.2752, 'grad_norm': 0.693122523639224, 'learning_rate': 1.6485582882590833e-07, 'epoch': 0.92} + 92%|█████████▏| 11220/12188 [29:46<2:07:51, 7.93s/it] 92%|█████████▏| 11221/12188 [29:53<2:03:36, 7.67s/it] {'loss': 0.3185, 'grad_norm': 0.7380830748551617, 'learning_rate': 1.6451762208683387e-07, 'epoch': 0.92} + 92%|█████████▏| 11221/12188 [29:53<2:03:36, 7.67s/it] 92%|█████████▏| 11222/12188 [30:01<2:03:33, 7.67s/it] {'loss': 0.2786, 'grad_norm': 0.7315668002785362, 'learning_rate': 1.6417975682175568e-07, 'epoch': 0.92} + 92%|█████████▏| 11222/12188 [30:01<2:03:33, 7.67s/it] 92%|█████████▏| 11223/12188 [30:10<2:12:37, 8.25s/it] {'loss': 0.3002, 'grad_norm': 0.6489513779207133, 'learning_rate': 1.6384223305453417e-07, 'epoch': 0.92} + 92%|█████████▏| 11223/12188 [30:11<2:12:37, 8.25s/it] 92%|█████████▏| 11224/12188 [30:18<2:08:21, 7.99s/it] {'loss': 0.2885, 'grad_norm': 0.7156433300243424, 'learning_rate': 1.6350505080900358e-07, 'epoch': 0.92} + 92%|█████████▏| 11224/12188 [30:18<2:08:21, 7.99s/it] 92%|█████████▏| 11225/12188 [30:25<2:03:38, 7.70s/it] {'loss': 0.3369, 'grad_norm': 0.8097202021341822, 'learning_rate': 1.63168210108976e-07, 'epoch': 0.92} + 92%|█████████▏| 11225/12188 [30:25<2:03:38, 7.70s/it] 92%|█████████▏| 11226/12188 [30:32<2:00:21, 7.51s/it] {'loss': 0.3033, 'grad_norm': 0.6997281446841124, 'learning_rate': 1.6283171097823903e-07, 'epoch': 0.92} + 92%|█████████▏| 11226/12188 [30:32<2:00:21, 7.51s/it][2025-08-18 15:40:10,033] [WARNING] [stage3.py:2118:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time + 92%|█████████▏| 11227/12188 [30:45<2:25:48, 9.10s/it] {'loss': 0.3067, 'grad_norm': 0.7274296613003955, 'learning_rate': 1.6249555344055533e-07, 'epoch': 0.92} + 92%|█████████▏| 11227/12188 [30:45<2:25:48, 9.10s/it] 92%|█████████▏| 11228/12188 [30:54<2:23:57, 9.00s/it] {'loss': 0.3197, 'grad_norm': 0.7244215288581305, 'learning_rate': 1.6215973751966262e-07, 'epoch': 0.92} + 92%|█████████▏| 11228/12188 [30:54<2:23:57, 9.00s/it] 92%|█████████▏| 11229/12188 [31:00<2:13:42, 8.37s/it] {'loss': 0.2788, 'grad_norm': 0.6568461946854757, 'learning_rate': 1.6182426323927747e-07, 'epoch': 0.92} + 92%|█████████▏| 11229/12188 [31:00<2:13:42, 8.37s/it] 92%|█████████▏| 11230/12188 [31:09<2:14:49, 8.44s/it] {'loss': 0.2933, 'grad_norm': 0.6830176769780119, 'learning_rate': 1.6148913062308923e-07, 'epoch': 0.92} + 92%|█████████▏| 11230/12188 [31:09<2:14:49, 8.44s/it] 92%|█████████▏| 11231/12188 [31:19<2:22:13, 8.92s/it] {'loss': 0.2522, 'grad_norm': 0.6659966776175709, 'learning_rate': 1.6115433969476567e-07, 'epoch': 0.92} + 92%|█████████▏| 11231/12188 [31:19<2:22:13, 8.92s/it] 92%|█████████▏| 11232/12188 [31:26<2:12:28, 8.31s/it] {'loss': 0.243, 'grad_norm': 0.6416814449500788, 'learning_rate': 1.6081989047794732e-07, 'epoch': 0.92} + 92%|█████████▏| 11232/12188 [31:26<2:12:28, 8.31s/it] 92%|█████████▏| 11233/12188 [31:34<2:08:47, 8.09s/it] {'loss': 0.2977, 'grad_norm': 0.7828312298933339, 'learning_rate': 1.6048578299625472e-07, 'epoch': 0.92} + 92%|█████████▏| 11233/12188 [31:34<2:08:47, 8.09s/it] 92%|█████████▏| 11234/12188 [31:43<2:15:36, 8.53s/it] {'loss': 0.2773, 'grad_norm': 0.6484183542340036, 'learning_rate': 1.6015201727328012e-07, 'epoch': 0.92} + 92%|█████████▏| 11234/12188 [31:43<2:15:36, 8.53s/it] 92%|█████████▏| 11235/12188 [31:50<2:06:16, 7.95s/it] {'loss': 0.2825, 'grad_norm': 0.7398038205574786, 'learning_rate': 1.598185933325941e-07, 'epoch': 0.92} + 92%|█████████▏| 11235/12188 [31:50<2:06:16, 7.95s/it] 92%|█████████▏| 11236/12188 [31:57<2:03:12, 7.77s/it] {'loss': 0.3566, 'grad_norm': 0.7770219843471642, 'learning_rate': 1.5948551119774336e-07, 'epoch': 0.92} + 92%|█████████▏| 11236/12188 [31:57<2:03:12, 7.77s/it] 92%|█████████▏| 11237/12188 [32:04<1:59:01, 7.51s/it] {'loss': 0.2787, 'grad_norm': 0.7429731839683104, 'learning_rate': 1.59152770892248e-07, 'epoch': 0.92} + 92%|█████████▏| 11237/12188 [32:04<1:59:01, 7.51s/it] 92%|█████████▏| 11238/12188 [32:13<2:04:33, 7.87s/it] {'loss': 0.3258, 'grad_norm': 0.6724076694432708, 'learning_rate': 1.5882037243960702e-07, 'epoch': 0.92} + 92%|█████████▏| 11238/12188 [32:13<2:04:33, 7.87s/it] 92%|█████████▏| 11239/12188 [32:20<2:02:50, 7.77s/it] {'loss': 0.3092, 'grad_norm': 0.7261166791085739, 'learning_rate': 1.5848831586329382e-07, 'epoch': 0.92} + 92%|█████████▏| 11239/12188 [32:20<2:02:50, 7.77s/it] 92%|█████████▏| 11240/12188 [32:27<1:58:26, 7.50s/it] {'loss': 0.3151, 'grad_norm': 0.7031246372915178, 'learning_rate': 1.581566011867569e-07, 'epoch': 0.92} + 92%|█████████▏| 11240/12188 [32:27<1:58:26, 7.50s/it] 92%|█████████▏| 11241/12188 [32:34<1:55:48, 7.34s/it] {'loss': 0.2817, 'grad_norm': 0.8385952162855425, 'learning_rate': 1.5782522843342197e-07, 'epoch': 0.92} + 92%|█████████▏| 11241/12188 [32:34<1:55:48, 7.34s/it] 92%|█████████▏| 11242/12188 [32:43<2:04:05, 7.87s/it] {'loss': 0.2798, 'grad_norm': 0.7567554499519671, 'learning_rate': 1.574941976266897e-07, 'epoch': 0.92} + 92%|█████████▏| 11242/12188 [32:43<2:04:05, 7.87s/it] 92%|█████████▏| 11243/12188 [32:50<1:58:29, 7.52s/it] {'loss': 0.3105, 'grad_norm': 0.6938814861520782, 'learning_rate': 1.5716350878993703e-07, 'epoch': 0.92} + 92%|█████████▏| 11243/12188 [32:50<1:58:29, 7.52s/it] 92%|█████████▏| 11244/12188 [32:57<1:57:27, 7.47s/it] {'loss': 0.2714, 'grad_norm': 0.7037823046432325, 'learning_rate': 1.5683316194651743e-07, 'epoch': 0.92} + 92%|█████████▏| 11244/12188 [32:57<1:57:27, 7.47s/it] 92%|█████████▏| 11245/12188 [33:04<1:53:45, 7.24s/it] {'loss': 0.3316, 'grad_norm': 0.7305422802329253, 'learning_rate': 1.5650315711975783e-07, 'epoch': 0.92} + 92%|█████████▏| 11245/12188 [33:04<1:53:45, 7.24s/it] 92%|█████████▏| 11246/12188 [33:11<1:51:39, 7.11s/it] {'loss': 0.3598, 'grad_norm': 0.7227168386220015, 'learning_rate': 1.5617349433296402e-07, 'epoch': 0.92} + 92%|█████████▏| 11246/12188 [33:11<1:51:39, 7.11s/it] 92%|█████████▏| 11247/12188 [33:19<1:58:23, 7.55s/it] {'loss': 0.3518, 'grad_norm': 0.7132040831413079, 'learning_rate': 1.5584417360941572e-07, 'epoch': 0.92} + 92%|█████████▏| 11247/12188 [33:19<1:58:23, 7.55s/it] 92%|█████████▏| 11248/12188 [33:27<1:58:24, 7.56s/it] {'loss': 0.3027, 'grad_norm': 0.7009303508074556, 'learning_rate': 1.5551519497236877e-07, 'epoch': 0.92} + 92%|█████████▏| 11248/12188 [33:27<1:58:24, 7.56s/it] 92%|█████████▏| 11249/12188 [33:33<1:53:50, 7.27s/it] {'loss': 0.2898, 'grad_norm': 0.6848132570659111, 'learning_rate': 1.5518655844505627e-07, 'epoch': 0.92} + 92%|█████████▏| 11249/12188 [33:33<1:53:50, 7.27s/it] 92%|█████████▏| 11250/12188 [33:43<2:06:32, 8.09s/it] {'loss': 0.2954, 'grad_norm': 0.7467213532511601, 'learning_rate': 1.548582640506846e-07, 'epoch': 0.92} + 92%|█████████▏| 11250/12188 [33:43<2:06:32, 8.09s/it] 92%|█████████▏| 11251/12188 [33:53<2:11:09, 8.40s/it] {'loss': 0.3023, 'grad_norm': 0.6460662306141186, 'learning_rate': 1.5453031181243804e-07, 'epoch': 0.92} + 92%|█████████▏| 11251/12188 [33:53<2:11:09, 8.40s/it] 92%|█████████▏| 11252/12188 [33:59<2:02:20, 7.84s/it] {'loss': 0.2764, 'grad_norm': 0.6877756114934357, 'learning_rate': 1.5420270175347696e-07, 'epoch': 0.92} + 92%|█████████▏| 11252/12188 [33:59<2:02:20, 7.84s/it] 92%|█████████▏| 11253/12188 [34:06<1:59:46, 7.69s/it] {'loss': 0.2782, 'grad_norm': 0.7624324327889929, 'learning_rate': 1.538754338969345e-07, 'epoch': 0.92} + 92%|█████████▏| 11253/12188 [34:06<1:59:46, 7.69s/it] 92%|█████████▏| 11254/12188 [34:13<1:56:29, 7.48s/it] {'loss': 0.2793, 'grad_norm': 0.8253004160708491, 'learning_rate': 1.5354850826592383e-07, 'epoch': 0.92} + 92%|█████████▏| 11254/12188 [34:13<1:56:29, 7.48s/it] 92%|█████████▏| 11255/12188 [34:20<1:52:11, 7.22s/it] {'loss': 0.2637, 'grad_norm': 0.7413141103772559, 'learning_rate': 1.532219248835315e-07, 'epoch': 0.92} + 92%|█████████▏| 11255/12188 [34:20<1:52:11, 7.22s/it] 92%|█████████▏| 11256/12188 [34:27<1:49:52, 7.07s/it] {'loss': 0.2924, 'grad_norm': 1.1489779299006848, 'learning_rate': 1.5289568377281905e-07, 'epoch': 0.92} + 92%|█████████▏| 11256/12188 [34:27<1:49:52, 7.07s/it] 92%|█████████▏| 11257/12188 [34:34<1:49:29, 7.06s/it] {'loss': 0.3224, 'grad_norm': 0.7159806448963018, 'learning_rate': 1.525697849568264e-07, 'epoch': 0.92} + 92%|█████████▏| 11257/12188 [34:34<1:49:29, 7.06s/it] 92%|█████████▏| 11258/12188 [34:43<1:59:00, 7.68s/it] {'loss': 0.3126, 'grad_norm': 0.6897578485381174, 'learning_rate': 1.5224422845856678e-07, 'epoch': 0.92} + 92%|█████████▏| 11258/12188 [34:43<1:59:00, 7.68s/it] 92%|█████████▏| 11259/12188 [34:52<2:06:01, 8.14s/it] {'loss': 0.2657, 'grad_norm': 0.719473696899104, 'learning_rate': 1.5191901430103185e-07, 'epoch': 0.92} + 92%|█████████▏| 11259/12188 [34:52<2:06:01, 8.14s/it] 92%|█████████▏| 11260/12188 [34:59<2:01:33, 7.86s/it] {'loss': 0.3425, 'grad_norm': 0.7933579556099438, 'learning_rate': 1.5159414250718706e-07, 'epoch': 0.92} + 92%|█████████▏| 11260/12188 [34:59<2:01:33, 7.86s/it] 92%|█████████▏| 11261/12188 [35:06<1:56:28, 7.54s/it] {'loss': 0.2938, 'grad_norm': 0.6684641933143073, 'learning_rate': 1.5126961309997355e-07, 'epoch': 0.92} + 92%|█████████▏| 11261/12188 [35:06<1:56:28, 7.54s/it] 92%|█████████▏| 11262/12188 [35:13<1:52:12, 7.27s/it] {'loss': 0.2899, 'grad_norm': 1.0785554639161548, 'learning_rate': 1.5094542610231133e-07, 'epoch': 0.92} + 92%|█████████▏| 11262/12188 [35:13<1:52:12, 7.27s/it] 92%|█████████▏| 11263/12188 [35:20<1:51:38, 7.24s/it] {'loss': 0.318, 'grad_norm': 0.6385669797373326, 'learning_rate': 1.506215815370915e-07, 'epoch': 0.92} + 92%|█████████▏| 11263/12188 [35:20<1:51:38, 7.24s/it] 92%|█████████▏| 11264/12188 [35:27<1:51:00, 7.21s/it] {'loss': 0.2859, 'grad_norm': 0.8662288552764338, 'learning_rate': 1.5029807942718466e-07, 'epoch': 0.92} + 92%|█████████▏| 11264/12188 [35:27<1:51:00, 7.21s/it] 92%|█████████▏| 11265/12188 [35:34<1:50:19, 7.17s/it] {'loss': 0.3221, 'grad_norm': 0.8051146380429164, 'learning_rate': 1.4997491979543587e-07, 'epoch': 0.92} + 92%|█████████▏| 11265/12188 [35:34<1:50:19, 7.17s/it] 92%|█████████▏| 11266/12188 [35:41<1:48:21, 7.05s/it] {'loss': 0.3195, 'grad_norm': 0.6571247837905112, 'learning_rate': 1.496521026646658e-07, 'epoch': 0.92} + 92%|█████████▏| 11266/12188 [35:41<1:48:21, 7.05s/it] 92%|█████████▏| 11267/12188 [35:48<1:46:37, 6.95s/it] {'loss': 0.2978, 'grad_norm': 0.7176169104633566, 'learning_rate': 1.4932962805767225e-07, 'epoch': 0.92} + 92%|█████████▏| 11267/12188 [35:48<1:46:37, 6.95s/it] 92%|█████████▏| 11268/12188 [35:55<1:47:14, 6.99s/it] {'loss': 0.2873, 'grad_norm': 0.7202111834483665, 'learning_rate': 1.4900749599722653e-07, 'epoch': 0.92} + 92%|█████████▏| 11268/12188 [35:55<1:47:14, 6.99s/it] 92%|█████████▏| 11269/12188 [36:02<1:48:24, 7.08s/it] {'loss': 0.274, 'grad_norm': 0.8348923152984996, 'learning_rate': 1.4868570650607816e-07, 'epoch': 0.92} + 92%|█████████▏| 11269/12188 [36:02<1:48:24, 7.08s/it] 92%|█████████▏| 11270/12188 [36:09<1:46:50, 6.98s/it] {'loss': 0.2909, 'grad_norm': 0.6895149148244675, 'learning_rate': 1.4836425960695123e-07, 'epoch': 0.92} + 92%|█████████▏| 11270/12188 [36:09<1:46:50, 6.98s/it] 92%|█████████▏| 11271/12188 [36:16<1:49:57, 7.19s/it] {'loss': 0.3261, 'grad_norm': 0.6924222721195658, 'learning_rate': 1.4804315532254475e-07, 'epoch': 0.92} + 92%|█████████▏| 11271/12188 [36:17<1:49:57, 7.19s/it] 92%|█████████▏| 11272/12188 [36:24<1:49:38, 7.18s/it] {'loss': 0.2946, 'grad_norm': 0.7929231906139812, 'learning_rate': 1.4772239367553564e-07, 'epoch': 0.92} + 92%|█████████▏| 11272/12188 [36:24<1:49:38, 7.18s/it] 92%|█████████▏| 11273/12188 [36:31<1:48:16, 7.10s/it] {'loss': 0.2947, 'grad_norm': 0.7673087684561123, 'learning_rate': 1.474019746885752e-07, 'epoch': 0.92} + 92%|█████████▏| 11273/12188 [36:31<1:48:16, 7.10s/it] 93%|█████████▎| 11274/12188 [36:38<1:47:57, 7.09s/it] {'loss': 0.3079, 'grad_norm': 1.3306785447620768, 'learning_rate': 1.4708189838429087e-07, 'epoch': 0.92} + 93%|█████████▎| 11274/12188 [36:38<1:47:57, 7.09s/it] 93%|█████████▎| 11275/12188 [36:44<1:44:58, 6.90s/it] {'loss': 0.3073, 'grad_norm': 0.7697372147011703, 'learning_rate': 1.4676216478528683e-07, 'epoch': 0.93} + 93%|█████████▎| 11275/12188 [36:44<1:44:58, 6.90s/it] 93%|█████████▎| 11276/12188 [36:51<1:46:22, 7.00s/it] {'loss': 0.2983, 'grad_norm': 0.866881774370742, 'learning_rate': 1.4644277391414107e-07, 'epoch': 0.93} + 93%|█████████▎| 11276/12188 [36:51<1:46:22, 7.00s/it] 93%|█████████▎| 11277/12188 [36:58<1:45:54, 6.98s/it] {'loss': 0.2789, 'grad_norm': 0.7635845733959953, 'learning_rate': 1.4612372579340893e-07, 'epoch': 0.93} + 93%|█████████▎| 11277/12188 [36:58<1:45:54, 6.98s/it] 93%|█████████▎| 11278/12188 [37:05<1:45:13, 6.94s/it] {'loss': 0.2894, 'grad_norm': 0.684839333253168, 'learning_rate': 1.4580502044562016e-07, 'epoch': 0.93} + 93%|█████████▎| 11278/12188 [37:05<1:45:13, 6.94s/it] 93%|█████████▎| 11279/12188 [37:12<1:46:25, 7.02s/it] {'loss': 0.3122, 'grad_norm': 0.8307232684411257, 'learning_rate': 1.454866578932823e-07, 'epoch': 0.93} + 93%|█████████▎| 11279/12188 [37:12<1:46:25, 7.02s/it] 93%|█████████▎| 11280/12188 [37:20<1:48:19, 7.16s/it] {'loss': 0.2826, 'grad_norm': 0.6924368772217575, 'learning_rate': 1.4516863815887795e-07, 'epoch': 0.93} + 93%|█████████▎| 11280/12188 [37:20<1:48:19, 7.16s/it] 93%|█████████▎| 11281/12188 [37:27<1:46:35, 7.05s/it] {'loss': 0.309, 'grad_norm': 0.6699930946165995, 'learning_rate': 1.4485096126486355e-07, 'epoch': 0.93} + 93%|█████████▎| 11281/12188 [37:27<1:46:35, 7.05s/it] 93%|█████████▎| 11282/12188 [37:34<1:47:39, 7.13s/it] {'loss': 0.2711, 'grad_norm': 0.7733420429558961, 'learning_rate': 1.4453362723367449e-07, 'epoch': 0.93} + 93%|█████████▎| 11282/12188 [37:34<1:47:39, 7.13s/it] 93%|█████████▎| 11283/12188 [37:41<1:45:40, 7.01s/it] {'loss': 0.3129, 'grad_norm': 0.7097211331580467, 'learning_rate': 1.4421663608772009e-07, 'epoch': 0.93} + 93%|█████████▎| 11283/12188 [37:41<1:45:40, 7.01s/it] 93%|█████████▎| 11284/12188 [37:50<1:58:25, 7.86s/it] {'loss': 0.2572, 'grad_norm': 0.6938022702547668, 'learning_rate': 1.4389998784938465e-07, 'epoch': 0.93} + 93%|█████████▎| 11284/12188 [37:50<1:58:25, 7.86s/it] 93%|█████████▎| 11285/12188 [37:58<1:56:49, 7.76s/it] {'loss': 0.2857, 'grad_norm': 0.7944561410899126, 'learning_rate': 1.4358368254103084e-07, 'epoch': 0.93} + 93%|█████████▎| 11285/12188 [37:58<1:56:49, 7.76s/it] 93%|█████████▎| 11286/12188 [38:05<1:51:32, 7.42s/it] {'loss': 0.351, 'grad_norm': 0.7185777067500927, 'learning_rate': 1.4326772018499414e-07, 'epoch': 0.93} + 93%|█████████▎| 11286/12188 [38:05<1:51:32, 7.42s/it] 93%|█████████▎| 11287/12188 [38:15<2:06:32, 8.43s/it] {'loss': 0.2837, 'grad_norm': 0.7121219167860867, 'learning_rate': 1.4295210080358835e-07, 'epoch': 0.93} + 93%|█████████▎| 11287/12188 [38:15<2:06:32, 8.43s/it] 93%|█████████▎| 11288/12188 [38:22<1:59:20, 7.96s/it] {'loss': 0.2707, 'grad_norm': 0.7025674832225893, 'learning_rate': 1.4263682441910233e-07, 'epoch': 0.93} + 93%|█████████▎| 11288/12188 [38:22<1:59:20, 7.96s/it] 93%|█████████▎| 11289/12188 [38:30<1:56:34, 7.78s/it] {'loss': 0.3494, 'grad_norm': 0.7966154470879614, 'learning_rate': 1.4232189105379935e-07, 'epoch': 0.93} + 93%|█████████▎| 11289/12188 [38:30<1:56:34, 7.78s/it] 93%|█████████▎| 11290/12188 [38:37<1:52:41, 7.53s/it] {'loss': 0.2778, 'grad_norm': 0.6565736814925373, 'learning_rate': 1.4200730072991996e-07, 'epoch': 0.93} + 93%|█████████▎| 11290/12188 [38:37<1:52:41, 7.53s/it] 93%|█████████▎| 11291/12188 [38:45<1:56:17, 7.78s/it] {'loss': 0.323, 'grad_norm': 0.7455899875089774, 'learning_rate': 1.4169305346968033e-07, 'epoch': 0.93} + 93%|█████████▎| 11291/12188 [38:45<1:56:17, 7.78s/it] 93%|█████████▎| 11292/12188 [38:52<1:54:36, 7.67s/it] {'loss': 0.2545, 'grad_norm': 0.6409909935890012, 'learning_rate': 1.4137914929527097e-07, 'epoch': 0.93} + 93%|█████████▎| 11292/12188 [38:52<1:54:36, 7.67s/it] 93%|█████████▎| 11293/12188 [38:59<1:51:10, 7.45s/it] {'loss': 0.2503, 'grad_norm': 0.6607175234871537, 'learning_rate': 1.410655882288614e-07, 'epoch': 0.93} + 93%|█████████▎| 11293/12188 [38:59<1:51:10, 7.45s/it] 93%|█████████▎| 11294/12188 [39:07<1:50:36, 7.42s/it] {'loss': 0.309, 'grad_norm': 0.7989299238660069, 'learning_rate': 1.4075237029259227e-07, 'epoch': 0.93} + 93%|█████████▎| 11294/12188 [39:07<1:50:36, 7.42s/it] 93%|█████████▎| 11295/12188 [39:14<1:49:44, 7.37s/it] {'loss': 0.2872, 'grad_norm': 0.6922474454323494, 'learning_rate': 1.404394955085847e-07, 'epoch': 0.93} + 93%|█████████▎| 11295/12188 [39:14<1:49:44, 7.37s/it] 93%|█████████▎| 11296/12188 [39:22<1:53:12, 7.61s/it] {'loss': 0.2776, 'grad_norm': 0.7009723445818344, 'learning_rate': 1.401269638989322e-07, 'epoch': 0.93} + 93%|█████████▎| 11296/12188 [39:22<1:53:12, 7.61s/it] 93%|█████████▎| 11297/12188 [39:29<1:49:09, 7.35s/it] {'loss': 0.2725, 'grad_norm': 0.6189820644086861, 'learning_rate': 1.3981477548570544e-07, 'epoch': 0.93} + 93%|█████████▎| 11297/12188 [39:29<1:49:09, 7.35s/it] 93%|█████████▎| 11298/12188 [39:38<1:57:54, 7.95s/it] {'loss': 0.3044, 'grad_norm': 0.7266470441657543, 'learning_rate': 1.395029302909512e-07, 'epoch': 0.93} + 93%|█████████▎| 11298/12188 [39:38<1:57:54, 7.95s/it] 93%|█████████▎| 11299/12188 [39:45<1:51:43, 7.54s/it] {'loss': 0.2616, 'grad_norm': 0.7803392802194997, 'learning_rate': 1.391914283366902e-07, 'epoch': 0.93} + 93%|█████████▎| 11299/12188 [39:45<1:51:43, 7.54s/it] 93%|█████████▎| 11300/12188 [39:52<1:48:11, 7.31s/it] {'loss': 0.2654, 'grad_norm': 0.7574442006273264, 'learning_rate': 1.3888026964492208e-07, 'epoch': 0.93} + 93%|█████████▎| 11300/12188 [39:52<1:48:11, 7.31s/it] 93%|█████████▎| 11301/12188 [39:59<1:47:46, 7.29s/it] {'loss': 0.3163, 'grad_norm': 0.6566659677770029, 'learning_rate': 1.3856945423761813e-07, 'epoch': 0.93} + 93%|█████████▎| 11301/12188 [39:59<1:47:46, 7.29s/it] 93%|█████████▎| 11302/12188 [40:06<1:49:15, 7.40s/it] {'loss': 0.3274, 'grad_norm': 0.8064913729838163, 'learning_rate': 1.3825898213672918e-07, 'epoch': 0.93} + 93%|█████████▎| 11302/12188 [40:06<1:49:15, 7.40s/it] 93%|█████████▎| 11303/12188 [40:14<1:48:53, 7.38s/it] {'loss': 0.3109, 'grad_norm': 0.6465822861531719, 'learning_rate': 1.379488533641804e-07, 'epoch': 0.93} + 93%|█████████▎| 11303/12188 [40:14<1:48:53, 7.38s/it] 93%|█████████▎| 11304/12188 [40:20<1:45:53, 7.19s/it] {'loss': 0.2939, 'grad_norm': 0.6786564871450589, 'learning_rate': 1.376390679418721e-07, 'epoch': 0.93} + 93%|█████████▎| 11304/12188 [40:20<1:45:53, 7.19s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 93%|█████████▎| 11305/12188 [40:27<1:41:02, 6.87s/it] {'loss': 0.6328, 'grad_norm': 0.6881526843089633, 'learning_rate': 1.373296258916801e-07, 'epoch': 0.93} + 93%|█████████▎| 11305/12188 [40:27<1:41:02, 6.87s/it] 93%|█████████▎| 11306/12188 [40:37<1:55:05, 7.83s/it] {'loss': 0.2561, 'grad_norm': 0.6995970390614854, 'learning_rate': 1.3702052723545856e-07, 'epoch': 0.93} + 93%|█████████▎| 11306/12188 [40:37<1:55:05, 7.83s/it] 93%|█████████▎| 11307/12188 [40:45<1:55:36, 7.87s/it] {'loss': 0.2983, 'grad_norm': 0.7542655896611496, 'learning_rate': 1.3671177199503338e-07, 'epoch': 0.93} + 93%|█████████▎| 11307/12188 [40:45<1:55:36, 7.87s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 93%|█████████▎| 11308/12188 [40:51<1:48:16, 7.38s/it] {'loss': 0.6589, 'grad_norm': 0.666540028462741, 'learning_rate': 1.364033601922099e-07, 'epoch': 0.93} + 93%|█████████▎| 11308/12188 [40:51<1:48:16, 7.38s/it] 93%|█████████▎| 11309/12188 [40:58<1:47:10, 7.32s/it] {'loss': 0.2985, 'grad_norm': 1.0162268841959554, 'learning_rate': 1.360952918487668e-07, 'epoch': 0.93} + 93%|█████████▎| 11309/12188 [40:58<1:47:10, 7.32s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 93%|█████████▎| 11310/12188 [41:05<1:43:46, 7.09s/it] {'loss': 0.625, 'grad_norm': 0.5758721187064126, 'learning_rate': 1.3578756698646056e-07, 'epoch': 0.93} + 93%|█████████▎| 11310/12188 [41:05<1:43:46, 7.09s/it] 93%|█████████▎| 11311/12188 [41:12<1:43:16, 7.07s/it] {'loss': 0.33, 'grad_norm': 0.7672899640814427, 'learning_rate': 1.3548018562702104e-07, 'epoch': 0.93} + 93%|█████████▎| 11311/12188 [41:12<1:43:16, 7.07s/it] 93%|█████████▎| 11312/12188 [41:19<1:42:52, 7.05s/it] {'loss': 0.3322, 'grad_norm': 0.8490221986785434, 'learning_rate': 1.3517314779215473e-07, 'epoch': 0.93} + 93%|█████████▎| 11312/12188 [41:19<1:42:52, 7.05s/it] 93%|█████████▎| 11313/12188 [41:26<1:44:00, 7.13s/it] {'loss': 0.2838, 'grad_norm': 0.7224038350889929, 'learning_rate': 1.3486645350354599e-07, 'epoch': 0.93} + 93%|█████████▎| 11313/12188 [41:26<1:44:00, 7.13s/it] 93%|█████████▎| 11314/12188 [41:33<1:44:46, 7.19s/it] {'loss': 0.2507, 'grad_norm': 0.7033898892327435, 'learning_rate': 1.345601027828508e-07, 'epoch': 0.93} + 93%|█████████▎| 11314/12188 [41:33<1:44:46, 7.19s/it] 93%|█████████▎| 11315/12188 [41:40<1:42:54, 7.07s/it] {'loss': 0.2974, 'grad_norm': 0.703314011041665, 'learning_rate': 1.342540956517041e-07, 'epoch': 0.93} + 93%|████��████▎| 11315/12188 [41:40<1:42:54, 7.07s/it] 93%|█████████▎| 11316/12188 [41:47<1:42:05, 7.02s/it] {'loss': 0.2904, 'grad_norm': 0.7288594674854616, 'learning_rate': 1.3394843213171638e-07, 'epoch': 0.93} + 93%|█████████▎| 11316/12188 [41:47<1:42:05, 7.02s/it] 93%|█████████▎| 11317/12188 [41:54<1:43:45, 7.15s/it] {'loss': 0.3143, 'grad_norm': 0.6868334528643948, 'learning_rate': 1.33643112244472e-07, 'epoch': 0.93} + 93%|█████████▎| 11317/12188 [41:54<1:43:45, 7.15s/it] 93%|█████████▎| 11318/12188 [42:03<1:50:35, 7.63s/it] {'loss': 0.2922, 'grad_norm': 0.6796097000417619, 'learning_rate': 1.3333813601153322e-07, 'epoch': 0.93} + 93%|█████████▎| 11318/12188 [42:03<1:50:35, 7.63s/it] 93%|█████████▎| 11319/12188 [42:10<1:48:20, 7.48s/it] {'loss': 0.2968, 'grad_norm': 0.7530324104449578, 'learning_rate': 1.330335034544361e-07, 'epoch': 0.93} + 93%|█████████▎| 11319/12188 [42:10<1:48:20, 7.48s/it] 93%|█████████▎| 11320/12188 [42:18<1:47:40, 7.44s/it] {'loss': 0.2758, 'grad_norm': 0.6921543877021854, 'learning_rate': 1.3272921459469345e-07, 'epoch': 0.93} + 93%|█████████▎| 11320/12188 [42:18<1:47:40, 7.44s/it] 93%|█████████▎| 11321/12188 [42:25<1:45:29, 7.30s/it] {'loss': 0.2975, 'grad_norm': 0.6952511677696755, 'learning_rate': 1.3242526945379363e-07, 'epoch': 0.93} + 93%|█████████▎| 11321/12188 [42:25<1:45:29, 7.30s/it] 93%|█████████▎| 11322/12188 [42:35<1:59:11, 8.26s/it] {'loss': 0.2893, 'grad_norm': 0.7156688734173434, 'learning_rate': 1.3212166805320114e-07, 'epoch': 0.93} + 93%|█████████▎| 11322/12188 [42:35<1:59:11, 8.26s/it] 93%|█████████▎| 11323/12188 [42:45<2:07:21, 8.83s/it] {'loss': 0.315, 'grad_norm': 0.6783455564576695, 'learning_rate': 1.3181841041435605e-07, 'epoch': 0.93} + 93%|█████████▎| 11323/12188 [42:45<2:07:21, 8.83s/it] 93%|█████████▎| 11324/12188 [42:53<2:00:11, 8.35s/it] {'loss': 0.3207, 'grad_norm': 0.7194350370865308, 'learning_rate': 1.315154965586729e-07, 'epoch': 0.93} + 93%|█████████▎| 11324/12188 [42:53<2:00:11, 8.35s/it] 93%|█████████▎| 11325/12188 [43:00<1:54:57, 7.99s/it] {'loss': 0.3046, 'grad_norm': 0.7058075019433235, 'learning_rate': 1.3121292650754402e-07, 'epoch': 0.93} + 93%|█████████▎| 11325/12188 [43:00<1:54:57, 7.99s/it] 93%|█████████▎| 11326/12188 [43:06<1:48:39, 7.56s/it] {'loss': 0.3148, 'grad_norm': 0.7970080552247297, 'learning_rate': 1.3091070028233622e-07, 'epoch': 0.93} + 93%|█████████▎| 11326/12188 [43:06<1:48:39, 7.56s/it] 93%|█████████▎| 11327/12188 [43:13<1:46:19, 7.41s/it] {'loss': 0.2999, 'grad_norm': 0.6828082380409162, 'learning_rate': 1.306088179043913e-07, 'epoch': 0.93} + 93%|█████████▎| 11327/12188 [43:13<1:46:19, 7.41s/it] 93%|█████████▎| 11328/12188 [43:20<1:42:35, 7.16s/it] {'loss': 0.304, 'grad_norm': 0.7589734260862857, 'learning_rate': 1.303072793950283e-07, 'epoch': 0.93} + 93%|█████████▎| 11328/12188 [43:20<1:42:35, 7.16s/it] 93%|█████████▎| 11329/12188 [43:30<1:53:15, 7.91s/it] {'loss': 0.3165, 'grad_norm': 0.7068858583674917, 'learning_rate': 1.3000608477554188e-07, 'epoch': 0.93} + 93%|█████████▎| 11329/12188 [43:30<1:53:15, 7.91s/it] 93%|█████████▎| 11330/12188 [43:39<1:58:28, 8.28s/it] {'loss': 0.2848, 'grad_norm': 0.6814322738719302, 'learning_rate': 1.2970523406720114e-07, 'epoch': 0.93} + 93%|█████████▎| 11330/12188 [43:39<1:58:28, 8.28s/it] 93%|█████████▎| 11331/12188 [43:45<1:51:04, 7.78s/it] {'loss': 0.2641, 'grad_norm': 0.680235084300091, 'learning_rate': 1.2940472729125241e-07, 'epoch': 0.93} + 93%|█████████▎| 11331/12188 [43:45<1:51:04, 7.78s/it] 93%|███��█████▎| 11332/12188 [43:55<2:00:04, 8.42s/it] {'loss': 0.3188, 'grad_norm': 0.6846429587276059, 'learning_rate': 1.291045644689165e-07, 'epoch': 0.93} + 93%|█████████▎| 11332/12188 [43:55<2:00:04, 8.42s/it] 93%|█████████▎| 11333/12188 [44:02<1:52:58, 7.93s/it] {'loss': 0.3463, 'grad_norm': 0.7881679606342222, 'learning_rate': 1.2880474562139033e-07, 'epoch': 0.93} + 93%|█████████▎| 11333/12188 [44:02<1:52:58, 7.93s/it] 93%|█████████▎| 11334/12188 [44:09<1:50:24, 7.76s/it] {'loss': 0.2547, 'grad_norm': 0.722581946920362, 'learning_rate': 1.28505270769847e-07, 'epoch': 0.93} + 93%|█████████▎| 11334/12188 [44:09<1:50:24, 7.76s/it] 93%|█████████▎| 11335/12188 [44:16<1:44:52, 7.38s/it] {'loss': 0.263, 'grad_norm': 0.7157655469902046, 'learning_rate': 1.2820613993543452e-07, 'epoch': 0.93} + 93%|█████████▎| 11335/12188 [44:16<1:44:52, 7.38s/it] 93%|█████████▎| 11336/12188 [44:25<1:52:34, 7.93s/it] {'loss': 0.2976, 'grad_norm': 0.7516668240268467, 'learning_rate': 1.2790735313927825e-07, 'epoch': 0.93} + 93%|█████████▎| 11336/12188 [44:25<1:52:34, 7.93s/it] 93%|█████████▎| 11337/12188 [44:34<1:55:48, 8.17s/it] {'loss': 0.2567, 'grad_norm': 0.668691575019524, 'learning_rate': 1.2760891040247637e-07, 'epoch': 0.93} + 93%|█████████▎| 11337/12188 [44:34<1:55:48, 8.17s/it] 93%|█████████▎| 11338/12188 [44:41<1:50:16, 7.78s/it] {'loss': 0.2693, 'grad_norm': 0.6340642821873702, 'learning_rate': 1.2731081174610526e-07, 'epoch': 0.93} + 93%|█████████▎| 11338/12188 [44:41<1:50:16, 7.78s/it] 93%|█████████▎| 11339/12188 [44:48<1:47:45, 7.62s/it] {'loss': 0.3429, 'grad_norm': 0.7473002199912472, 'learning_rate': 1.270130571912165e-07, 'epoch': 0.93} + 93%|█████████▎| 11339/12188 [44:48<1:47:45, 7.62s/it] 93%|█████████▎| 11340/12188 [44:55<1:43:55, 7.35s/it] {'loss': 0.3089, 'grad_norm': 0.8531776726380402, 'learning_rate': 1.2671564675883595e-07, 'epoch': 0.93} + 93%|█████████▎| 11340/12188 [44:55<1:43:55, 7.35s/it] 93%|█████████▎| 11341/12188 [45:02<1:44:04, 7.37s/it] {'loss': 0.26, 'grad_norm': 0.6946455120376064, 'learning_rate': 1.2641858046996636e-07, 'epoch': 0.93} + 93%|█████████▎| 11341/12188 [45:02<1:44:04, 7.37s/it] 93%|█████████▎| 11342/12188 [45:09<1:41:44, 7.22s/it] {'loss': 0.3109, 'grad_norm': 0.7040774272969147, 'learning_rate': 1.261218583455881e-07, 'epoch': 0.93} + 93%|█████████▎| 11342/12188 [45:09<1:41:44, 7.22s/it] 93%|█████████▎| 11343/12188 [45:16<1:40:06, 7.11s/it] {'loss': 0.3072, 'grad_norm': 0.6986452330813387, 'learning_rate': 1.2582548040665276e-07, 'epoch': 0.93} + 93%|█████████▎| 11343/12188 [45:16<1:40:06, 7.11s/it] 93%|█████████▎| 11344/12188 [45:25<1:50:55, 7.89s/it] {'loss': 0.2713, 'grad_norm': 0.6043716876419991, 'learning_rate': 1.2552944667409139e-07, 'epoch': 0.93} + 93%|█████████▎| 11344/12188 [45:25<1:50:55, 7.89s/it] 93%|█████████▎| 11345/12188 [45:34<1:52:51, 8.03s/it] {'loss': 0.301, 'grad_norm': 0.7144954853438114, 'learning_rate': 1.2523375716880836e-07, 'epoch': 0.93} + 93%|█████████▎| 11345/12188 [45:34<1:52:51, 8.03s/it] 93%|█████████▎| 11346/12188 [45:41<1:48:09, 7.71s/it] {'loss': 0.2718, 'grad_norm': 0.7747470736598568, 'learning_rate': 1.2493841191168587e-07, 'epoch': 0.93} + 93%|█████████▎| 11346/12188 [45:41<1:48:09, 7.71s/it] 93%|█████████▎| 11347/12188 [45:49<1:51:30, 7.96s/it] {'loss': 0.3032, 'grad_norm': 0.6902688062263426, 'learning_rate': 1.2464341092358113e-07, 'epoch': 0.93} + 93%|█████████▎| 11347/12188 [45:49<1:51:30, 7.96s/it] 93%|█████████▎| 11348/12188 [45:56<1:47:02, 7.65s/it] {'loss': 0.3117, 'grad_norm': 0.798609948956912, 'learning_rate': 1.2434875422532467e-07, 'epoch': 0.93} + 93%|█████████▎| 11348/12188 [45:56<1:47:02, 7.65s/it] 93%|█████████▎| 11349/12188 [46:03<1:44:02, 7.44s/it] {'loss': 0.308, 'grad_norm': 0.7070960491842148, 'learning_rate': 1.2405444183772654e-07, 'epoch': 0.93} + 93%|█████████▎| 11349/12188 [46:03<1:44:02, 7.44s/it] 93%|█████████▎| 11350/12188 [46:10<1:43:14, 7.39s/it] {'loss': 0.2544, 'grad_norm': 0.6643152170320716, 'learning_rate': 1.2376047378156952e-07, 'epoch': 0.93} + 93%|█████████▎| 11350/12188 [46:10<1:43:14, 7.39s/it] 93%|█████████▎| 11351/12188 [46:18<1:41:41, 7.29s/it] {'loss': 0.2986, 'grad_norm': 0.7055525165930083, 'learning_rate': 1.2346685007761428e-07, 'epoch': 0.93} + 93%|█████████▎| 11351/12188 [46:18<1:41:41, 7.29s/it] 93%|█████████▎| 11352/12188 [46:24<1:38:58, 7.10s/it] {'loss': 0.3106, 'grad_norm': 0.6648923882624149, 'learning_rate': 1.2317357074659476e-07, 'epoch': 0.93} + 93%|█████████▎| 11352/12188 [46:24<1:38:58, 7.10s/it] 93%|█████████▎| 11353/12188 [46:31<1:37:56, 7.04s/it] {'loss': 0.2635, 'grad_norm': 0.718062376020297, 'learning_rate': 1.2288063580922272e-07, 'epoch': 0.93} + 93%|█████████▎| 11353/12188 [46:31<1:37:56, 7.04s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f33ea171210> +[Try #0] Failed to fetch sample 4411617 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f33ea171210> +Problematic sample: {'image': '20240827_145511_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Q2790: 100%'"}, {'from': 'gpt', 'value': '\nclick(x=0.8995, y=0.972)\n'}]} + 93%|█████████▎| 11354/12188 [46:39<1:40:38, 7.24s/it] {'loss': 0.3004, 'grad_norm': 0.6503120132524756, 'learning_rate': 1.2258804528618495e-07, 'epoch': 0.93} + 93%|█████████▎| 11354/12188 [46:39<1:40:38, 7.24s/it] 93%|█████████▎| 11355/12188 [46:46<1:39:08, 7.14s/it] {'loss': 0.2717, 'grad_norm': 0.9143751355420756, 'learning_rate': 1.2229579919814217e-07, 'epoch': 0.93} + 93%|█████████▎| 11355/12188 [46:46<1:39:08, 7.14s/it] 93%|█████████▎| 11356/12188 [46:53<1:39:39, 7.19s/it] {'loss': 0.2874, 'grad_norm': 1.1510743926299936, 'learning_rate': 1.2200389756573395e-07, 'epoch': 0.93} + 93%|█████████▎| 11356/12188 [46:53<1:39:39, 7.19s/it] 93%|█████████▎| 11357/12188 [47:01<1:41:26, 7.32s/it] {'loss': 0.2401, 'grad_norm': 0.7245339117904487, 'learning_rate': 1.2171234040957324e-07, 'epoch': 0.93} + 93%|█████████▎| 11357/12188 [47:01<1:41:26, 7.32s/it] 93%|█████████▎| 11358/12188 [47:08<1:39:47, 7.21s/it] {'loss': 0.3395, 'grad_norm': 0.7415385907502469, 'learning_rate': 1.2142112775024972e-07, 'epoch': 0.93} + 93%|█████████▎| 11358/12188 [47:08<1:39:47, 7.21s/it] 93%|█████████▎| 11359/12188 [47:14<1:37:05, 7.03s/it] {'loss': 0.3045, 'grad_norm': 0.6682161051816078, 'learning_rate': 1.2113025960832748e-07, 'epoch': 0.93} + 93%|█████████▎| 11359/12188 [47:14<1:37:05, 7.03s/it] 93%|█████████▎| 11360/12188 [47:21<1:36:41, 7.01s/it] {'loss': 0.2926, 'grad_norm': 0.6879866588702872, 'learning_rate': 1.2083973600434894e-07, 'epoch': 0.93} + 93%|█████████▎| 11360/12188 [47:21<1:36:41, 7.01s/it] 93%|█████████▎| 11361/12188 [47:28<1:35:55, 6.96s/it] {'loss': 0.2887, 'grad_norm': 0.8132246979543966, 'learning_rate': 1.205495569588283e-07, 'epoch': 0.93} + 93%|█████████▎| 11361/12188 [47:28<1:35:55, 6.96s/it] 93%|█████████▎| 11362/12188 [47:35<1:34:11, 6.84s/it] {'loss': 0.2948, 'grad_norm': 0.7622520729719174, 'learning_rate': 1.2025972249225914e-07, 'epoch': 0.93} + 93%|█████████▎| 11362/12188 [47:35<1:34:11, 6.84s/it] 93%|█████████▎| 11363/12188 [47:41<1:34:09, 6.85s/it] {'loss': 0.2978, 'grad_norm': 0.6627139676446722, 'learning_rate': 1.1997023262510788e-07, 'epoch': 0.93} + 93%|█████████▎| 11363/12188 [47:41<1:34:09, 6.85s/it] 93%|█████████▎| 11364/12188 [47:49<1:36:45, 7.05s/it] {'loss': 0.3043, 'grad_norm': 0.7323533392822588, 'learning_rate': 1.196810873778187e-07, 'epoch': 0.93} + 93%|█████████▎| 11364/12188 [47:49<1:36:45, 7.05s/it] 93%|█████████▎| 11365/12188 [47:56<1:37:57, 7.14s/it] {'loss': 0.3267, 'grad_norm': 0.8024970091716898, 'learning_rate': 1.1939228677081027e-07, 'epoch': 0.93} + 93%|█████████▎| 11365/12188 [47:56<1:37:57, 7.14s/it] 93%|█████████▎| 11366/12188 [48:04<1:39:48, 7.29s/it] {'loss': 0.272, 'grad_norm': 0.7876580680196464, 'learning_rate': 1.1910383082447797e-07, 'epoch': 0.93} + 93%|█████████▎| 11366/12188 [48:04<1:39:48, 7.29s/it] 93%|█████████▎| 11367/12188 [48:11<1:39:37, 7.28s/it] {'loss': 0.307, 'grad_norm': 0.6686217803696209, 'learning_rate': 1.1881571955919102e-07, 'epoch': 0.93} + 93%|█████████▎| 11367/12188 [48:11<1:39:37, 7.28s/it] 93%|█████████▎| 11368/12188 [48:18<1:38:41, 7.22s/it] {'loss': 0.2596, 'grad_norm': 0.6237141096406098, 'learning_rate': 1.1852795299529485e-07, 'epoch': 0.93} + 93%|█████████▎| 11368/12188 [48:18<1:38:41, 7.22s/it] 93%|█████████▎| 11369/12188 [48:25<1:37:18, 7.13s/it] {'loss': 0.2947, 'grad_norm': 0.7113006090328416, 'learning_rate': 1.1824053115311263e-07, 'epoch': 0.93} + 93%|█████████▎| 11369/12188 [48:25<1:37:18, 7.13s/it] 93%|█████████▎| 11370/12188 [48:32<1:36:34, 7.08s/it] {'loss': 0.2798, 'grad_norm': 0.6886269551889486, 'learning_rate': 1.179534540529409e-07, 'epoch': 0.93} + 93%|█████████▎| 11370/12188 [48:32<1:36:34, 7.08s/it] 93%|█████████▎| 11371/12188 [48:39<1:37:15, 7.14s/it] {'loss': 0.2833, 'grad_norm': 0.6930589411163497, 'learning_rate': 1.1766672171505289e-07, 'epoch': 0.93} + 93%|█████████▎| 11371/12188 [48:39<1:37:15, 7.14s/it] 93%|█████████▎| 11372/12188 [48:47<1:37:40, 7.18s/it] {'loss': 0.3186, 'grad_norm': 0.6843290662287956, 'learning_rate': 1.1738033415969686e-07, 'epoch': 0.93} + 93%|█████████▎| 11372/12188 [48:47<1:37:40, 7.18s/it] 93%|█████████▎| 11373/12188 [48:54<1:37:50, 7.20s/it] {'loss': 0.2832, 'grad_norm': 0.764636732848578, 'learning_rate': 1.1709429140709661e-07, 'epoch': 0.93} + 93%|█████████▎| 11373/12188 [48:54<1:37:50, 7.20s/it] 93%|█████████▎| 11374/12188 [49:02<1:42:00, 7.52s/it] {'loss': 0.317, 'grad_norm': 1.0214355942450375, 'learning_rate': 1.1680859347745266e-07, 'epoch': 0.93} + 93%|█████████▎| 11374/12188 [49:02<1:42:00, 7.52s/it] 93%|█████████▎| 11375/12188 [49:09<1:40:27, 7.41s/it] {'loss': 0.2868, 'grad_norm': 0.6877474192837875, 'learning_rate': 1.1652324039094054e-07, 'epoch': 0.93} + 93%|█████████▎| 11375/12188 [49:09<1:40:27, 7.41s/it] 93%|█████████▎| 11376/12188 [49:16<1:37:17, 7.19s/it] {'loss': 0.2889, 'grad_norm': 0.7243527669883117, 'learning_rate': 1.1623823216771135e-07, 'epoch': 0.93} + 93%|█████████▎| 11376/12188 [49:16<1:37:17, 7.19s/it] 93%|█████████▎| 11377/12188 [49:24<1:41:45, 7.53s/it] {'loss': 0.2892, 'grad_norm': 0.7557404623516543, 'learning_rate': 1.1595356882789122e-07, 'epoch': 0.93} + 93%|█████████▎| 11377/12188 [49:24<1:41:45, 7.53s/it] 93%|█████████▎| 11378/12188 [49:31<1:39:49, 7.39s/it] {'loss': 0.3126, 'grad_norm': 0.705037421443264, 'learning_rate': 1.1566925039158405e-07, 'epoch': 0.93} + 93%|█████████▎| 11378/12188 [49:31<1:39:49, 7.39s/it] 93%|█████████▎| 11379/12188 [49:38<1:37:33, 7.24s/it] {'loss': 0.2835, 'grad_norm': 0.7418587776668075, 'learning_rate': 1.1538527687886714e-07, 'epoch': 0.93} + 93%|█████████▎| 11379/12188 [49:38<1:37:33, 7.24s/it] 93%|█████████▎| 11380/12188 [49:46<1:38:23, 7.31s/it] {'loss': 0.2519, 'grad_norm': 0.6373291606390553, 'learning_rate': 1.1510164830979387e-07, 'epoch': 0.93} + 93%|█████████▎| 11380/12188 [49:46<1:38:23, 7.31s/it] 93%|█████████▎| 11381/12188 [49:53<1:37:42, 7.27s/it] {'loss': 0.2733, 'grad_norm': 0.7095178027565632, 'learning_rate': 1.1481836470439433e-07, 'epoch': 0.93} + 93%|█████████▎| 11381/12188 [49:53<1:37:42, 7.27s/it] 93%|█████████▎| 11382/12188 [50:00<1:35:01, 7.07s/it] {'loss': 0.2939, 'grad_norm': 0.6434011430175635, 'learning_rate': 1.145354260826731e-07, 'epoch': 0.93} + 93%|█████████▎| 11382/12188 [50:00<1:35:01, 7.07s/it] 93%|█████████▎| 11383/12188 [50:07<1:38:01, 7.31s/it] {'loss': 0.3158, 'grad_norm': 0.7153939346877215, 'learning_rate': 1.1425283246461083e-07, 'epoch': 0.93} + 93%|█████████▎| 11383/12188 [50:07<1:38:01, 7.31s/it] 93%|█████████▎| 11384/12188 [50:14<1:34:39, 7.06s/it] {'loss': 0.3099, 'grad_norm': 0.677953473224625, 'learning_rate': 1.139705838701638e-07, 'epoch': 0.93} + 93%|█████████▎| 11384/12188 [50:14<1:34:39, 7.06s/it] 93%|█████████▎| 11385/12188 [50:22<1:39:45, 7.45s/it] {'loss': 0.2751, 'grad_norm': 0.7272491752284268, 'learning_rate': 1.1368868031926494e-07, 'epoch': 0.93} + 93%|█████████▎| 11385/12188 [50:22<1:39:45, 7.45s/it] 93%|█████████▎| 11386/12188 [50:30<1:41:41, 7.61s/it] {'loss': 0.2918, 'grad_norm': 0.6695788894970581, 'learning_rate': 1.1340712183182002e-07, 'epoch': 0.93} + 93%|█████████▎| 11386/12188 [50:30<1:41:41, 7.61s/it] 93%|█████████▎| 11387/12188 [50:39<1:45:26, 7.90s/it] {'loss': 0.2998, 'grad_norm': 0.7222747712253254, 'learning_rate': 1.1312590842771365e-07, 'epoch': 0.93} + 93%|█████████▎| 11387/12188 [50:39<1:45:26, 7.90s/it] 93%|█████████▎| 11388/12188 [50:45<1:40:08, 7.51s/it] {'loss': 0.3175, 'grad_norm': 0.7424207244354372, 'learning_rate': 1.1284504012680442e-07, 'epoch': 0.93} + 93%|█████████▎| 11388/12188 [50:45<1:40:08, 7.51s/it] 93%|█████████▎| 11389/12188 [50:52<1:36:29, 7.25s/it] {'loss': 0.2637, 'grad_norm': 0.6795127391073896, 'learning_rate': 1.1256451694892645e-07, 'epoch': 0.93} + 93%|█████████▎| 11389/12188 [50:52<1:36:29, 7.25s/it] 93%|█████████▎| 11390/12188 [51:00<1:39:09, 7.46s/it] {'loss': 0.2718, 'grad_norm': 0.7642575206965928, 'learning_rate': 1.1228433891389056e-07, 'epoch': 0.93} + 93%|█████���███▎| 11390/12188 [51:00<1:39:09, 7.46s/it] 93%|█████████▎| 11391/12188 [51:07<1:36:22, 7.26s/it] {'loss': 0.2851, 'grad_norm': 0.6432906078263981, 'learning_rate': 1.1200450604148095e-07, 'epoch': 0.93} + 93%|█████████▎| 11391/12188 [51:07<1:36:22, 7.26s/it] 93%|█████████▎| 11392/12188 [51:15<1:38:54, 7.45s/it] {'loss': 0.3382, 'grad_norm': 0.6728861216529479, 'learning_rate': 1.1172501835146065e-07, 'epoch': 0.93} + 93%|█████████▎| 11392/12188 [51:15<1:38:54, 7.45s/it] 93%|█████████▎| 11393/12188 [51:25<1:49:22, 8.25s/it] {'loss': 0.2757, 'grad_norm': 0.69674952931343, 'learning_rate': 1.1144587586356614e-07, 'epoch': 0.93} + 93%|█████████▎| 11393/12188 [51:25<1:49:22, 8.25s/it] 93%|█████████▎| 11394/12188 [51:34<1:52:28, 8.50s/it] {'loss': 0.292, 'grad_norm': 0.7674367357913665, 'learning_rate': 1.1116707859750941e-07, 'epoch': 0.93} + 93%|█████████▎| 11394/12188 [51:34<1:52:28, 8.50s/it] 93%|█████████▎| 11395/12188 [51:43<1:56:06, 8.78s/it] {'loss': 0.2674, 'grad_norm': 0.6991860920018598, 'learning_rate': 1.1088862657297916e-07, 'epoch': 0.93} + 93%|█████████▎| 11395/12188 [51:43<1:56:06, 8.78s/it] 94%|█████████▎| 11396/12188 [51:50<1:47:12, 8.12s/it] {'loss': 0.3164, 'grad_norm': 0.7320706083341251, 'learning_rate': 1.106105198096391e-07, 'epoch': 0.93} + 94%|█████████▎| 11396/12188 [51:50<1:47:12, 8.12s/it] 94%|█████████▎| 11397/12188 [51:57<1:42:05, 7.74s/it] {'loss': 0.2826, 'grad_norm': 0.6927567785628175, 'learning_rate': 1.1033275832712853e-07, 'epoch': 0.94} + 94%|█████████▎| 11397/12188 [51:57<1:42:05, 7.74s/it] 94%|█████████▎| 11398/12188 [52:04<1:39:04, 7.52s/it] {'loss': 0.2982, 'grad_norm': 0.6925867431466574, 'learning_rate': 1.1005534214506342e-07, 'epoch': 0.94} + 94%|█████████▎| 11398/12188 [52:04<1:39:04, 7.52s/it] 94%|█████████▎| 11399/12188 [52:11<1:37:08, 7.39s/it] {'loss': 0.3062, 'grad_norm': 0.6524764695307873, 'learning_rate': 1.0977827128303309e-07, 'epoch': 0.94} + 94%|█████████▎| 11399/12188 [52:11<1:37:08, 7.39s/it] 94%|█████████▎| 11400/12188 [52:18<1:36:55, 7.38s/it] {'loss': 0.2816, 'grad_norm': 0.7120332745738039, 'learning_rate': 1.0950154576060413e-07, 'epoch': 0.94} + 94%|█████████▎| 11400/12188 [52:18<1:36:55, 7.38s/it] 94%|█████████▎| 11401/12188 [52:25<1:35:37, 7.29s/it] {'loss': 0.2668, 'grad_norm': 0.6810351289104225, 'learning_rate': 1.0922516559731978e-07, 'epoch': 0.94} + 94%|█████████▎| 11401/12188 [52:25<1:35:37, 7.29s/it] 94%|█████████▎| 11402/12188 [52:32<1:33:03, 7.10s/it] {'loss': 0.3269, 'grad_norm': 0.7191539255311966, 'learning_rate': 1.0894913081269608e-07, 'epoch': 0.94} + 94%|█████████▎| 11402/12188 [52:32<1:33:03, 7.10s/it] 94%|█████████▎| 11403/12188 [52:39<1:31:43, 7.01s/it] {'loss': 0.3108, 'grad_norm': 0.7077094364612563, 'learning_rate': 1.0867344142622693e-07, 'epoch': 0.94} + 94%|█████████▎| 11403/12188 [52:39<1:31:43, 7.01s/it] 94%|█████████▎| 11404/12188 [52:45<1:30:06, 6.90s/it] {'loss': 0.3031, 'grad_norm': 0.7192578921499797, 'learning_rate': 1.0839809745738061e-07, 'epoch': 0.94} + 94%|█████████▎| 11404/12188 [52:45<1:30:06, 6.90s/it] 94%|█████████▎| 11405/12188 [52:52<1:30:25, 6.93s/it] {'loss': 0.2794, 'grad_norm': 0.7150029024353461, 'learning_rate': 1.0812309892560157e-07, 'epoch': 0.94} + 94%|█████████▎| 11405/12188 [52:52<1:30:25, 6.93s/it] 94%|█████████▎| 11406/12188 [53:02<1:38:43, 7.58s/it] {'loss': 0.3225, 'grad_norm': 0.6251377928285317, 'learning_rate': 1.078484458503104e-07, 'epoch': 0.94} + 94%|█████████▎| 11406/12188 [53:02<1:38:43, 7.58s/it] 94%|█████████▎| 11407/12188 [53:08<1:34:28, 7.26s/it] {'loss': 0.2677, 'grad_norm': 0.6745416113262285, 'learning_rate': 1.0757413825090212e-07, 'epoch': 0.94} + 94%|█████████▎| 11407/12188 [53:08<1:34:28, 7.26s/it] 94%|█████████▎| 11408/12188 [53:15<1:34:08, 7.24s/it] {'loss': 0.2913, 'grad_norm': 0.7066834879761443, 'learning_rate': 1.0730017614674736e-07, 'epoch': 0.94} + 94%|█████████▎| 11408/12188 [53:15<1:34:08, 7.24s/it] 94%|█████████▎| 11409/12188 [53:22<1:32:56, 7.16s/it] {'loss': 0.3257, 'grad_norm': 0.6940824566268075, 'learning_rate': 1.07026559557194e-07, 'epoch': 0.94} + 94%|█████████▎| 11409/12188 [53:22<1:32:56, 7.16s/it] 94%|█████████▎| 11410/12188 [53:29<1:31:01, 7.02s/it] {'loss': 0.2896, 'grad_norm': 0.6684772613927763, 'learning_rate': 1.0675328850156319e-07, 'epoch': 0.94} + 94%|█████████▎| 11410/12188 [53:29<1:31:01, 7.02s/it] 94%|█████████▎| 11411/12188 [53:36<1:30:41, 7.00s/it] {'loss': 0.3562, 'grad_norm': 0.7013108676212123, 'learning_rate': 1.0648036299915399e-07, 'epoch': 0.94} + 94%|█████████▎| 11411/12188 [53:36<1:30:41, 7.00s/it] 94%|█████████▎| 11412/12188 [53:45<1:37:52, 7.57s/it] {'loss': 0.3078, 'grad_norm': 0.7026126439510249, 'learning_rate': 1.0620778306923929e-07, 'epoch': 0.94} + 94%|█████████▎| 11412/12188 [53:45<1:37:52, 7.57s/it] 94%|█████████▎| 11413/12188 [53:52<1:35:33, 7.40s/it] {'loss': 0.3324, 'grad_norm': 0.9196421106997658, 'learning_rate': 1.059355487310687e-07, 'epoch': 0.94} + 94%|█████████▎| 11413/12188 [53:52<1:35:33, 7.40s/it] 94%|█████████▎| 11414/12188 [53:59<1:34:12, 7.30s/it] {'loss': 0.2647, 'grad_norm': 0.6698138792592061, 'learning_rate': 1.0566366000386685e-07, 'epoch': 0.94} + 94%|█████████▎| 11414/12188 [53:59<1:34:12, 7.30s/it] 94%|█████████▎| 11415/12188 [54:06<1:33:46, 7.28s/it] {'loss': 0.2956, 'grad_norm': 0.7283313471583907, 'learning_rate': 1.0539211690683337e-07, 'epoch': 0.94} + 94%|█████████▎| 11415/12188 [54:06<1:33:46, 7.28s/it] 94%|█████████▎| 11416/12188 [54:13<1:31:20, 7.10s/it] {'loss': 0.285, 'grad_norm': 0.7629096957751474, 'learning_rate': 1.0512091945914571e-07, 'epoch': 0.94} + 94%|█████████▎| 11416/12188 [54:13<1:31:20, 7.10s/it] 94%|█████████▎| 11417/12188 [54:21<1:34:39, 7.37s/it] {'loss': 0.3025, 'grad_norm': 0.6645620847356021, 'learning_rate': 1.0485006767995354e-07, 'epoch': 0.94} + 94%|█████████▎| 11417/12188 [54:21<1:34:39, 7.37s/it] 94%|█████████▎| 11418/12188 [54:28<1:34:11, 7.34s/it] {'loss': 0.3345, 'grad_norm': 0.702933557898953, 'learning_rate': 1.0457956158838545e-07, 'epoch': 0.94} + 94%|█████████▎| 11418/12188 [54:28<1:34:11, 7.34s/it] 94%|█████████▎| 11419/12188 [54:35<1:33:51, 7.32s/it] {'loss': 0.3138, 'grad_norm': 0.7238014218756318, 'learning_rate': 1.0430940120354393e-07, 'epoch': 0.94} + 94%|█████████▎| 11419/12188 [54:35<1:33:51, 7.32s/it] 94%|█████████▎| 11420/12188 [54:45<1:41:06, 7.90s/it] {'loss': 0.3233, 'grad_norm': 0.6787078484690223, 'learning_rate': 1.0403958654450652e-07, 'epoch': 0.94} + 94%|█████████▎| 11420/12188 [54:45<1:41:06, 7.90s/it] 94%|█████████▎| 11421/12188 [54:52<1:38:57, 7.74s/it] {'loss': 0.2962, 'grad_norm': 0.7347312314064685, 'learning_rate': 1.037701176303274e-07, 'epoch': 0.94} + 94%|█████████▎| 11421/12188 [54:52<1:38:57, 7.74s/it] 94%|█████████▎| 11422/12188 [54:59<1:36:15, 7.54s/it] {'loss': 0.2847, 'grad_norm': 0.6889942276040103, 'learning_rate': 1.0350099448003692e-07, 'epoch': 0.94} + 94%|█████████▎| 11422/12188 [54:59<1:36:15, 7.54s/it] 94%|█████████▎| 11423/12188 [55:07<1:39:50, 7.83s/it] {'loss': 0.32, 'grad_norm': 0.6749469185759392, 'learning_rate': 1.0323221711263876e-07, 'epoch': 0.94} + 94%|█████████▎| 11423/12188 [55:07<1:39:50, 7.83s/it] 94%|█████████▎| 11424/12188 [55:14<1:35:43, 7.52s/it] {'loss': 0.303, 'grad_norm': 0.7430444587154149, 'learning_rate': 1.0296378554711439e-07, 'epoch': 0.94} + 94%|█████████▎| 11424/12188 [55:14<1:35:43, 7.52s/it] 94%|█████████▎| 11425/12188 [55:21<1:32:17, 7.26s/it] {'loss': 0.2723, 'grad_norm': 0.6473361254547878, 'learning_rate': 1.0269569980241978e-07, 'epoch': 0.94} + 94%|█████████▎| 11425/12188 [55:21<1:32:17, 7.26s/it] 94%|█████████▎| 11426/12188 [55:28<1:30:07, 7.10s/it] {'loss': 0.3183, 'grad_norm': 0.6902709558388592, 'learning_rate': 1.02427959897487e-07, 'epoch': 0.94} + 94%|█████████▎| 11426/12188 [55:28<1:30:07, 7.10s/it] 94%|█████████▍| 11427/12188 [55:36<1:32:58, 7.33s/it] {'loss': 0.2598, 'grad_norm': 1.1388357936171465, 'learning_rate': 1.0216056585122258e-07, 'epoch': 0.94} + 94%|█████████▍| 11427/12188 [55:36<1:32:58, 7.33s/it] 94%|█████████▍| 11428/12188 [55:42<1:30:56, 7.18s/it] {'loss': 0.3173, 'grad_norm': 0.731706513874735, 'learning_rate': 1.0189351768250977e-07, 'epoch': 0.94} + 94%|█████████▍| 11428/12188 [55:42<1:30:56, 7.18s/it] 94%|█████████▍| 11429/12188 [55:49<1:28:47, 7.02s/it] {'loss': 0.2952, 'grad_norm': 0.6682030191163658, 'learning_rate': 1.0162681541020791e-07, 'epoch': 0.94} + 94%|█████████▍| 11429/12188 [55:49<1:28:47, 7.02s/it] 94%|█████████▍| 11430/12188 [55:56<1:28:38, 7.02s/it] {'loss': 0.3512, 'grad_norm': 0.6683870633164067, 'learning_rate': 1.0136045905315028e-07, 'epoch': 0.94} + 94%|█████████▍| 11430/12188 [55:56<1:28:38, 7.02s/it] 94%|█████████▍| 11431/12188 [56:03<1:28:22, 7.00s/it] {'loss': 0.2801, 'grad_norm': 0.6645153883428933, 'learning_rate': 1.0109444863014684e-07, 'epoch': 0.94} + 94%|█████████▍| 11431/12188 [56:03<1:28:22, 7.00s/it] 94%|█████████▍| 11432/12188 [56:10<1:27:03, 6.91s/it] {'loss': 0.2936, 'grad_norm': 0.8113522885705011, 'learning_rate': 1.0082878415998309e-07, 'epoch': 0.94} + 94%|█████████▍| 11432/12188 [56:10<1:27:03, 6.91s/it] 94%|█████████▍| 11433/12188 [56:17<1:27:58, 6.99s/it] {'loss': 0.329, 'grad_norm': 0.7964447173703453, 'learning_rate': 1.0056346566141906e-07, 'epoch': 0.94} + 94%|█████████▍| 11433/12188 [56:17<1:27:58, 6.99s/it] 94%|█████████▍| 11434/12188 [56:24<1:28:51, 7.07s/it] {'loss': 0.2994, 'grad_norm': 0.6837996186234019, 'learning_rate': 1.0029849315319196e-07, 'epoch': 0.94} + 94%|█████████▍| 11434/12188 [56:24<1:28:51, 7.07s/it] 94%|█████████▍| 11435/12188 [56:31<1:27:37, 6.98s/it] {'loss': 0.3159, 'grad_norm': 0.7024778601294469, 'learning_rate': 1.0003386665401293e-07, 'epoch': 0.94} + 94%|█████████▍| 11435/12188 [56:31<1:27:37, 6.98s/it] 94%|█████████▍| 11436/12188 [56:38<1:29:44, 7.16s/it] {'loss': 0.2878, 'grad_norm': 0.6890442553898564, 'learning_rate': 9.976958618256982e-08, 'epoch': 0.94} + 94%|█████████▍| 11436/12188 [56:38<1:29:44, 7.16s/it] 94%|█████████▍| 11437/12188 [56:46<1:30:45, 7.25s/it] {'loss': 0.3435, 'grad_norm': 0.7259112840620162, 'learning_rate': 9.950565175752602e-08, 'epoch': 0.94} + 94%|█████████▍| 11437/12188 [56:46<1:30:45, 7.25s/it] 94%|█████████▍| 11438/12188 [56:54<1:32:32, 7.40s/it] {'loss': 0.3157, 'grad_norm': 0.6869387281566256, 'learning_rate': 9.92420633975194e-08, 'epoch': 0.94} + 94%|█████████▍| 11438/12188 [56:54<1:32:32, 7.40s/it] 94%|█████████▍| 11439/12188 [57:02<1:37:26, 7.81s/it] {'loss': 0.2773, 'grad_norm': 0.7058088887540364, 'learning_rate': 9.897882112116509e-08, 'epoch': 0.94} + 94%|█████████▍| 11439/12188 [57:02<1:37:26, 7.81s/it] 94%|█████████▍| 11440/12188 [57:09<1:33:19, 7.49s/it] {'loss': 0.3107, 'grad_norm': 0.67782561600126, 'learning_rate': 9.871592494705206e-08, 'epoch': 0.94} + 94%|█████████▍| 11440/12188 [57:09<1:33:19, 7.49s/it] 94%|█████████▍| 11441/12188 [57:20<1:46:21, 8.54s/it] {'loss': 0.2921, 'grad_norm': 0.7722522037761208, 'learning_rate': 9.845337489374551e-08, 'epoch': 0.94} + 94%|█████████▍| 11441/12188 [57:20<1:46:21, 8.54s/it] 94%|█████████▍| 11442/12188 [57:29<1:46:07, 8.54s/it] {'loss': 0.3063, 'grad_norm': 0.7663882985313167, 'learning_rate': 9.819117097978726e-08, 'epoch': 0.94} + 94%|█████████▍| 11442/12188 [57:29<1:46:07, 8.54s/it] 94%|█████████▍| 11443/12188 [57:38<1:47:04, 8.62s/it] {'loss': 0.291, 'grad_norm': 0.7113863444665262, 'learning_rate': 9.792931322369304e-08, 'epoch': 0.94} + 94%|█████████▍| 11443/12188 [57:38<1:47:04, 8.62s/it] 94%|█████████▍| 11444/12188 [57:46<1:46:42, 8.61s/it] {'loss': 0.2839, 'grad_norm': 0.7354604567156434, 'learning_rate': 9.766780164395472e-08, 'epoch': 0.94} + 94%|█████████▍| 11444/12188 [57:46<1:46:42, 8.61s/it] 94%|█████████▍| 11445/12188 [57:53<1:41:33, 8.20s/it] {'loss': 0.2888, 'grad_norm': 0.7328924880999031, 'learning_rate': 9.740663625904034e-08, 'epoch': 0.94} + 94%|█████████▍| 11445/12188 [57:53<1:41:33, 8.20s/it] 94%|█████████▍| 11446/12188 [58:00<1:35:22, 7.71s/it] {'loss': 0.3163, 'grad_norm': 0.7491876630320152, 'learning_rate': 9.714581708739235e-08, 'epoch': 0.94} + 94%|█████████▍| 11446/12188 [58:00<1:35:22, 7.71s/it] 94%|█████████▍| 11447/12188 [58:10<1:42:23, 8.29s/it] {'loss': 0.296, 'grad_norm': 0.708030989215466, 'learning_rate': 9.688534414743045e-08, 'epoch': 0.94} + 94%|█████████▍| 11447/12188 [58:10<1:42:23, 8.29s/it] 94%|█████████▍| 11448/12188 [58:17<1:38:19, 7.97s/it] {'loss': 0.3275, 'grad_norm': 0.8994742036769268, 'learning_rate': 9.662521745754716e-08, 'epoch': 0.94} + 94%|█████████▍| 11448/12188 [58:17<1:38:19, 7.97s/it] 94%|█████████▍| 11449/12188 [58:24<1:34:14, 7.65s/it] {'loss': 0.2747, 'grad_norm': 0.7929552189329202, 'learning_rate': 9.63654370361139e-08, 'epoch': 0.94} + 94%|█████████▍| 11449/12188 [58:24<1:34:14, 7.65s/it] 94%|█████████▍| 11450/12188 [58:33<1:41:43, 8.27s/it] {'loss': 0.2876, 'grad_norm': 0.7657098830473625, 'learning_rate': 9.610600290147487e-08, 'epoch': 0.94} + 94%|█████████▍| 11450/12188 [58:33<1:41:43, 8.27s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f725153c770> +[Try #0] Failed to fetch sample 4654491 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f725153c770> +Problematic sample: {'image': '20240827_145511_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Show desktop'"}, {'from': 'gpt', 'value': '\nclick(x=0.998, y=0.972)\n'}]} + 94%|█████████▍| 11451/12188 [58:40<1:35:31, 7.78s/it] {'loss': 0.3199, 'grad_norm': 0.7586222629183531, 'learning_rate': 9.584691507195098e-08, 'epoch': 0.94} + 94%|█████████▍| 11451/12188 [58:40<1:35:31, 7.78s/it] 94%|█████████▍| 11452/12188 [58:48<1:37:12, 7.92s/it] {'loss': 0.3373, 'grad_norm': 0.7427473629366543, 'learning_rate': 9.558817356583926e-08, 'epoch': 0.94} + 94%|█████████▍| 11452/12188 [58:48<1:37:12, 7.92s/it] 94%|█████████▍| 11453/12188 [58:57<1:39:10, 8.10s/it] {'loss': 0.2736, 'grad_norm': 0.7348968661113876, 'learning_rate': 9.532977840141123e-08, 'epoch': 0.94} + 94%|█████████▍| 11453/12188 [58:57<1:39:10, 8.10s/it] 94%|█████████▍| 11454/12188 [59:04<1:36:59, 7.93s/it] {'loss': 0.2774, 'grad_norm': 0.7310794884839442, 'learning_rate': 9.507172959691336e-08, 'epoch': 0.94} + 94%|█████████▍| 11454/12188 [59:04<1:36:59, 7.93s/it] 94%|█████████▍| 11455/12188 [59:11<1:32:55, 7.61s/it] {'loss': 0.3306, 'grad_norm': 0.7760736364952775, 'learning_rate': 9.481402717057053e-08, 'epoch': 0.94} + 94%|█████████▍| 11455/12188 [59:11<1:32:55, 7.61s/it] 94%|█████████▍| 11456/12188 [59:18<1:29:46, 7.36s/it] {'loss': 0.2961, 'grad_norm': 0.701662949351488, 'learning_rate': 9.455667114057988e-08, 'epoch': 0.94} + 94%|█████████▍| 11456/12188 [59:18<1:29:46, 7.36s/it] 94%|█████████▍| 11457/12188 [59:25<1:30:18, 7.41s/it] {'loss': 0.3189, 'grad_norm': 0.6888398741300183, 'learning_rate': 9.429966152511627e-08, 'epoch': 0.94} + 94%|█████████▍| 11457/12188 [59:25<1:30:18, 7.41s/it] 94%|█████████▍| 11458/12188 [59:33<1:30:47, 7.46s/it] {'loss': 0.2626, 'grad_norm': 0.6796679192059882, 'learning_rate': 9.404299834232855e-08, 'epoch': 0.94} + 94%|█████████▍| 11458/12188 [59:33<1:30:47, 7.46s/it] 94%|█████████▍| 11459/12188 [59:40<1:29:53, 7.40s/it] {'loss': 0.2922, 'grad_norm': 0.7148741081850765, 'learning_rate': 9.378668161034222e-08, 'epoch': 0.94} + 94%|█████████▍| 11459/12188 [59:40<1:29:53, 7.40s/it] 94%|█████████▍| 11460/12188 [59:49<1:35:21, 7.86s/it] {'loss': 0.2677, 'grad_norm': 0.6871698670469886, 'learning_rate': 9.353071134725888e-08, 'epoch': 0.94} + 94%|█████████▍| 11460/12188 [59:49<1:35:21, 7.86s/it] 94%|█████████▍| 11461/12188 [59:57<1:33:19, 7.70s/it] {'loss': 0.2994, 'grad_norm': 0.6935554486941434, 'learning_rate': 9.3275087571153e-08, 'epoch': 0.94} + 94%|█████████▍| 11461/12188 [59:57<1:33:19, 7.70s/it] 94%|█████████▍| 11462/12188 [1:00:04<1:31:58, 7.60s/it] {'loss': 0.2865, 'grad_norm': 0.8540209163476267, 'learning_rate': 9.301981030007734e-08, 'epoch': 0.94} + 94%|█████████▍| 11462/12188 [1:00:04<1:31:58, 7.60s/it] 94%|█████████▍| 11463/12188 [1:00:12<1:31:40, 7.59s/it] {'loss': 0.2636, 'grad_norm': 0.66482085248686, 'learning_rate': 9.27648795520597e-08, 'epoch': 0.94} + 94%|█████████▍| 11463/12188 [1:00:12<1:31:40, 7.59s/it] 94%|█████████▍| 11464/12188 [1:00:20<1:33:14, 7.73s/it] {'loss': 0.2902, 'grad_norm': 0.6619617278428446, 'learning_rate': 9.251029534510126e-08, 'epoch': 0.94} + 94%|█████████▍| 11464/12188 [1:00:20<1:33:14, 7.73s/it] 94%|█████████▍| 11465/12188 [1:00:28<1:33:53, 7.79s/it] {'loss': 0.3091, 'grad_norm': 0.7028078227457657, 'learning_rate': 9.225605769718149e-08, 'epoch': 0.94} + 94%|███████��█▍| 11465/12188 [1:00:28<1:33:53, 7.79s/it] 94%|█████████▍| 11466/12188 [1:00:34<1:30:28, 7.52s/it] {'loss': 0.258, 'grad_norm': 0.6862602765249473, 'learning_rate': 9.200216662625439e-08, 'epoch': 0.94} + 94%|█████████▍| 11466/12188 [1:00:34<1:30:28, 7.52s/it] 94%|█████████▍| 11467/12188 [1:00:41<1:27:30, 7.28s/it] {'loss': 0.2873, 'grad_norm': 0.7508916579149345, 'learning_rate': 9.174862215024837e-08, 'epoch': 0.94} + 94%|█████████▍| 11467/12188 [1:00:41<1:27:30, 7.28s/it] 94%|█████████▍| 11468/12188 [1:00:51<1:35:17, 7.94s/it] {'loss': 0.284, 'grad_norm': 0.6547282055816662, 'learning_rate': 9.14954242870697e-08, 'epoch': 0.94} + 94%|█████████▍| 11468/12188 [1:00:51<1:35:17, 7.94s/it] 94%|█████████▍| 11469/12188 [1:00:59<1:38:38, 8.23s/it] {'loss': 0.2685, 'grad_norm': 0.7723083986463452, 'learning_rate': 9.124257305459738e-08, 'epoch': 0.94} + 94%|█████████▍| 11469/12188 [1:01:00<1:38:38, 8.23s/it] 94%|█████████▍| 11470/12188 [1:01:07<1:36:17, 8.05s/it] {'loss': 0.2847, 'grad_norm': 0.7134827890280564, 'learning_rate': 9.099006847068826e-08, 'epoch': 0.94} + 94%|█████████▍| 11470/12188 [1:01:07<1:36:17, 8.05s/it] 94%|█████████▍| 11471/12188 [1:01:14<1:32:02, 7.70s/it] {'loss': 0.3105, 'grad_norm': 1.0703280578923817, 'learning_rate': 9.07379105531736e-08, 'epoch': 0.94} + 94%|█████████▍| 11471/12188 [1:01:14<1:32:02, 7.70s/it] 94%|█████████▍| 11472/12188 [1:01:21<1:29:28, 7.50s/it] {'loss': 0.3195, 'grad_norm': 0.6774738811676279, 'learning_rate': 9.04860993198603e-08, 'epoch': 0.94} + 94%|█████████▍| 11472/12188 [1:01:21<1:29:28, 7.50s/it] 94%|█████████▍| 11473/12188 [1:01:28<1:28:02, 7.39s/it] {'loss': 0.3073, 'grad_norm': 0.6579387166517395, 'learning_rate': 9.023463478853134e-08, 'epoch': 0.94} + 94%|█████████▍| 11473/12188 [1:01:28<1:28:02, 7.39s/it] 94%|█████████▍| 11474/12188 [1:01:35<1:26:52, 7.30s/it] {'loss': 0.3019, 'grad_norm': 0.7069567934525888, 'learning_rate': 8.998351697694363e-08, 'epoch': 0.94} + 94%|█████████▍| 11474/12188 [1:01:35<1:26:52, 7.30s/it] 94%|█████████▍| 11475/12188 [1:01:42<1:25:44, 7.21s/it] {'loss': 0.285, 'grad_norm': 0.7360174293028335, 'learning_rate': 8.973274590283188e-08, 'epoch': 0.94} + 94%|█████████▍| 11475/12188 [1:01:42<1:25:44, 7.21s/it] 94%|█████████▍| 11476/12188 [1:01:49<1:25:32, 7.21s/it] {'loss': 0.3243, 'grad_norm': 0.6726513906912551, 'learning_rate': 8.948232158390468e-08, 'epoch': 0.94} + 94%|█████████▍| 11476/12188 [1:01:49<1:25:32, 7.21s/it] 94%|█████████▍| 11477/12188 [1:01:57<1:24:59, 7.17s/it] {'loss': 0.2895, 'grad_norm': 0.6868743781780161, 'learning_rate': 8.923224403784681e-08, 'epoch': 0.94} + 94%|█████████▍| 11477/12188 [1:01:57<1:24:59, 7.17s/it] 94%|█████████▍| 11478/12188 [1:02:03<1:22:43, 6.99s/it] {'loss': 0.2696, 'grad_norm': 0.6489240257269928, 'learning_rate': 8.898251328231799e-08, 'epoch': 0.94} + 94%|█████████▍| 11478/12188 [1:02:03<1:22:43, 6.99s/it] 94%|█████████▍| 11479/12188 [1:02:13<1:31:54, 7.78s/it] {'loss': 0.2825, 'grad_norm': 0.7097224732814252, 'learning_rate': 8.873312933495359e-08, 'epoch': 0.94} + 94%|█████████▍| 11479/12188 [1:02:13<1:31:54, 7.78s/it] 94%|█████████▍| 11480/12188 [1:02:21<1:32:10, 7.81s/it] {'loss': 0.2609, 'grad_norm': 0.6264540945808537, 'learning_rate': 8.848409221336452e-08, 'epoch': 0.94} + 94%|█████████▍| 11480/12188 [1:02:21<1:32:10, 7.81s/it] 94%|█████████▍| 11481/12188 [1:02:28<1:29:10, 7.57s/it] {'loss': 0.3051, 'grad_norm': 0.6968199022972988, 'learning_rate': 8.823540193513891e-08, 'epoch': 0.94} + 94%|████���████▍| 11481/12188 [1:02:28<1:29:10, 7.57s/it] 94%|█████████▍| 11482/12188 [1:02:35<1:28:10, 7.49s/it] {'loss': 0.2981, 'grad_norm': 0.7072539667819604, 'learning_rate': 8.798705851783717e-08, 'epoch': 0.94} + 94%|█████████▍| 11482/12188 [1:02:35<1:28:10, 7.49s/it] 94%|█████████▍| 11483/12188 [1:02:42<1:26:46, 7.39s/it] {'loss': 0.2882, 'grad_norm': 0.7514776141079097, 'learning_rate': 8.773906197899861e-08, 'epoch': 0.94} + 94%|█████████▍| 11483/12188 [1:02:42<1:26:46, 7.39s/it] 94%|█████████▍| 11484/12188 [1:02:51<1:30:52, 7.74s/it] {'loss': 0.2608, 'grad_norm': 0.6735048082818708, 'learning_rate': 8.749141233613422e-08, 'epoch': 0.94} + 94%|█████████▍| 11484/12188 [1:02:51<1:30:52, 7.74s/it] 94%|█████████▍| 11485/12188 [1:02:58<1:30:02, 7.69s/it] {'loss': 0.2993, 'grad_norm': 0.7626265008567341, 'learning_rate': 8.72441096067339e-08, 'epoch': 0.94} + 94%|█████████▍| 11485/12188 [1:02:58<1:30:02, 7.69s/it] 94%|█████████▍| 11486/12188 [1:03:05<1:25:31, 7.31s/it] {'loss': 0.3019, 'grad_norm': 0.6947325713724519, 'learning_rate': 8.699715380826256e-08, 'epoch': 0.94} + 94%|█████████▍| 11486/12188 [1:03:05<1:25:31, 7.31s/it] 94%|█████████▍| 11487/12188 [1:03:12<1:26:04, 7.37s/it] {'loss': 0.3306, 'grad_norm': 0.7214082257416643, 'learning_rate': 8.675054495815793e-08, 'epoch': 0.94} + 94%|█████████▍| 11487/12188 [1:03:12<1:26:04, 7.37s/it] 94%|█████████▍| 11488/12188 [1:03:19<1:25:11, 7.30s/it] {'loss': 0.2815, 'grad_norm': 0.7218178707491109, 'learning_rate': 8.650428307383663e-08, 'epoch': 0.94} + 94%|█████████▍| 11488/12188 [1:03:19<1:25:11, 7.30s/it] 94%|█████████▍| 11489/12188 [1:03:27<1:25:35, 7.35s/it] {'loss': 0.2996, 'grad_norm': 0.698172270630462, 'learning_rate': 8.625836817268806e-08, 'epoch': 0.94} + 94%|█████████▍| 11489/12188 [1:03:27<1:25:35, 7.35s/it] 94%|█████████▍| 11490/12188 [1:03:34<1:24:29, 7.26s/it] {'loss': 0.2738, 'grad_norm': 0.7116114522674988, 'learning_rate': 8.601280027208004e-08, 'epoch': 0.94} + 94%|█████████▍| 11490/12188 [1:03:34<1:24:29, 7.26s/it] 94%|█████████▍| 11491/12188 [1:03:42<1:29:18, 7.69s/it] {'loss': 0.3144, 'grad_norm': 0.7185916766519873, 'learning_rate': 8.576757938935254e-08, 'epoch': 0.94} + 94%|█████████▍| 11491/12188 [1:03:43<1:29:18, 7.69s/it] 94%|█████████▍| 11492/12188 [1:03:49<1:26:32, 7.46s/it] {'loss': 0.3036, 'grad_norm': 0.7078850606515764, 'learning_rate': 8.552270554182284e-08, 'epoch': 0.94} + 94%|█████████▍| 11492/12188 [1:03:49<1:26:32, 7.46s/it] 94%|█████████▍| 11493/12188 [1:03:58<1:29:14, 7.70s/it] {'loss': 0.2282, 'grad_norm': 0.706721619488562, 'learning_rate': 8.52781787467849e-08, 'epoch': 0.94} + 94%|█████████▍| 11493/12188 [1:03:58<1:29:14, 7.70s/it] 94%|█████████▍| 11494/12188 [1:04:05<1:27:50, 7.59s/it] {'loss': 0.2606, 'grad_norm': 0.6633821453983167, 'learning_rate': 8.503399902150488e-08, 'epoch': 0.94} + 94%|█████████▍| 11494/12188 [1:04:05<1:27:50, 7.59s/it] 94%|█████████▍| 11495/12188 [1:04:13<1:27:18, 7.56s/it] {'loss': 0.3325, 'grad_norm': 0.7048127757722764, 'learning_rate': 8.479016638322791e-08, 'epoch': 0.94} + 94%|█████████▍| 11495/12188 [1:04:13<1:27:18, 7.56s/it] 94%|█████████▍| 11496/12188 [1:04:21<1:30:20, 7.83s/it] {'loss': 0.2996, 'grad_norm': 0.7249995695128444, 'learning_rate': 8.45466808491724e-08, 'epoch': 0.94} + 94%|█████████▍| 11496/12188 [1:04:21<1:30:20, 7.83s/it] 94%|█████████▍| 11497/12188 [1:04:28<1:26:26, 7.51s/it] {'loss': 0.3287, 'grad_norm': 0.697100500202365, 'learning_rate': 8.430354243653294e-08, 'epoch': 0.94} + 94%|█████████▍| 11497/12188 [1:04:28<1:26:26, 7.51s/it] 94%|█████████▍| 11498/12188 [1:04:35<1:25:12, 7.41s/it] {'loss': 0.3223, 'grad_norm': 0.6437952546123307, 'learning_rate': 8.40607511624797e-08, 'epoch': 0.94} + 94%|█████████▍| 11498/12188 [1:04:35<1:25:12, 7.41s/it] 94%|█████████▍| 11499/12188 [1:04:45<1:34:49, 8.26s/it] {'loss': 0.2706, 'grad_norm': 0.723190603127589, 'learning_rate': 8.381830704415839e-08, 'epoch': 0.94} + 94%|█████████▍| 11499/12188 [1:04:45<1:34:49, 8.26s/it] 94%|█████████▍| 11500/12188 [1:04:52<1:31:26, 7.98s/it] {'loss': 0.3267, 'grad_norm': 0.6578161932896323, 'learning_rate': 8.357621009868921e-08, 'epoch': 0.94} + 94%|█████████▍| 11500/12188 [1:04:52<1:31:26, 7.98s/it] 94%|█████████▍| 11501/12188 [1:05:00<1:28:48, 7.76s/it] {'loss': 0.2753, 'grad_norm': 0.6661708796472642, 'learning_rate': 8.333446034317017e-08, 'epoch': 0.94} + 94%|█████████▍| 11501/12188 [1:05:00<1:28:48, 7.76s/it] 94%|█████████▍| 11502/12188 [1:05:06<1:25:18, 7.46s/it] {'loss': 0.2951, 'grad_norm': 0.6752583827360523, 'learning_rate': 8.309305779467147e-08, 'epoch': 0.94} + 94%|█████████▍| 11502/12188 [1:05:06<1:25:18, 7.46s/it] 94%|█████████▍| 11503/12188 [1:05:13<1:22:13, 7.20s/it] {'loss': 0.3394, 'grad_norm': 0.7412918260204266, 'learning_rate': 8.285200247024172e-08, 'epoch': 0.94} + 94%|█████████▍| 11503/12188 [1:05:13<1:22:13, 7.20s/it] 94%|█████████▍| 11504/12188 [1:05:20<1:21:15, 7.13s/it] {'loss': 0.2964, 'grad_norm': 0.6040991633364576, 'learning_rate': 8.26112943869034e-08, 'epoch': 0.94} + 94%|█████████▍| 11504/12188 [1:05:20<1:21:15, 7.13s/it] 94%|█████████▍| 11505/12188 [1:05:27<1:22:14, 7.23s/it] {'loss': 0.2635, 'grad_norm': 0.7098190473250056, 'learning_rate': 8.237093356165571e-08, 'epoch': 0.94} + 94%|█████████▍| 11505/12188 [1:05:28<1:22:14, 7.23s/it] 94%|█████████▍| 11506/12188 [1:05:35<1:23:24, 7.34s/it] {'loss': 0.2989, 'grad_norm': 0.8917316000444927, 'learning_rate': 8.213092001147117e-08, 'epoch': 0.94} + 94%|█████████▍| 11506/12188 [1:05:35<1:23:24, 7.34s/it] 94%|█████████▍| 11507/12188 [1:05:43<1:25:44, 7.56s/it] {'loss': 0.3148, 'grad_norm': 0.830300239950123, 'learning_rate': 8.189125375330009e-08, 'epoch': 0.94} + 94%|█████████▍| 11507/12188 [1:05:43<1:25:44, 7.56s/it] 94%|█████████▍| 11508/12188 [1:05:50<1:23:21, 7.36s/it] {'loss': 0.3112, 'grad_norm': 0.7022167984390512, 'learning_rate': 8.16519348040673e-08, 'epoch': 0.94} + 94%|█████████▍| 11508/12188 [1:05:50<1:23:21, 7.36s/it] 94%|█████████▍| 11509/12188 [1:05:58<1:25:44, 7.58s/it] {'loss': 0.3288, 'grad_norm': 0.7601301156569693, 'learning_rate': 8.14129631806726e-08, 'epoch': 0.94} + 94%|█████████▍| 11509/12188 [1:05:58<1:25:44, 7.58s/it] 94%|█████████▍| 11510/12188 [1:06:06<1:27:03, 7.70s/it] {'loss': 0.2861, 'grad_norm': 0.7140502961697428, 'learning_rate': 8.117433889999248e-08, 'epoch': 0.94} + 94%|█████████▍| 11510/12188 [1:06:06<1:27:03, 7.70s/it] 94%|█████████▍| 11511/12188 [1:06:13<1:24:29, 7.49s/it] {'loss': 0.2901, 'grad_norm': 0.7843973934227599, 'learning_rate': 8.093606197887793e-08, 'epoch': 0.94} + 94%|█████████▍| 11511/12188 [1:06:13<1:24:29, 7.49s/it] 94%|█████████▍| 11512/12188 [1:06:20<1:21:16, 7.21s/it] {'loss': 0.3115, 'grad_norm': 0.6914713456566951, 'learning_rate': 8.069813243415492e-08, 'epoch': 0.94} + 94%|█████████▍| 11512/12188 [1:06:20<1:21:16, 7.21s/it] 94%|█████████▍| 11513/12188 [1:06:27<1:22:58, 7.38s/it] {'loss': 0.3235, 'grad_norm': 0.7110298343113762, 'learning_rate': 8.046055028262667e-08, 'epoch': 0.94} + 94%|█████████▍| 11513/12188 [1:06:27<1:22:58, 7.38s/it] 94%|█████████▍| 11514/12188 [1:06:35<1:23:58, 7.48s/it] {'loss': 0.2994, 'grad_norm': 0.8206846483961602, 'learning_rate': 8.022331554107088e-08, 'epoch': 0.94} + 94%|█████████▍| 11514/12188 [1:06:35<1:23:58, 7.48s/it] 94%|█████████▍| 11515/12188 [1:06:42<1:22:50, 7.39s/it] {'loss': 0.3161, 'grad_norm': 0.724704633175941, 'learning_rate': 7.998642822624025e-08, 'epoch': 0.94} + 94%|█████████▍| 11515/12188 [1:06:42<1:22:50, 7.39s/it] 94%|█████████▍| 11516/12188 [1:06:50<1:22:52, 7.40s/it] {'loss': 0.2962, 'grad_norm': 0.7718271850696006, 'learning_rate': 7.974988835486308e-08, 'epoch': 0.94} + 94%|█████████▍| 11516/12188 [1:06:50<1:22:52, 7.40s/it] 94%|█████████▍| 11517/12188 [1:06:56<1:20:31, 7.20s/it] {'loss': 0.2931, 'grad_norm': 0.7259245990920091, 'learning_rate': 7.951369594364433e-08, 'epoch': 0.94} + 94%|█████████▍| 11517/12188 [1:06:57<1:20:31, 7.20s/it] 95%|█████████▍| 11518/12188 [1:07:03<1:18:40, 7.05s/it] {'loss': 0.3085, 'grad_norm': 0.757186414454943, 'learning_rate': 7.927785100926288e-08, 'epoch': 0.94} + 95%|█████████▍| 11518/12188 [1:07:03<1:18:40, 7.05s/it] 95%|█████████▍| 11519/12188 [1:07:11<1:19:39, 7.14s/it] {'loss': 0.3056, 'grad_norm': 0.6910295984736944, 'learning_rate': 7.904235356837376e-08, 'epoch': 0.95} + 95%|█████████▍| 11519/12188 [1:07:11<1:19:39, 7.14s/it] 95%|█████████▍| 11520/12188 [1:07:18<1:20:43, 7.25s/it] {'loss': 0.259, 'grad_norm': 0.654463135183127, 'learning_rate': 7.880720363760753e-08, 'epoch': 0.95} + 95%|█████████▍| 11520/12188 [1:07:18<1:20:43, 7.25s/it] 95%|█████████▍| 11521/12188 [1:07:27<1:26:39, 7.80s/it] {'loss': 0.3089, 'grad_norm': 0.7334688480209215, 'learning_rate': 7.85724012335698e-08, 'epoch': 0.95} + 95%|█████████▍| 11521/12188 [1:07:27<1:26:39, 7.80s/it] 95%|█████████▍| 11522/12188 [1:07:34<1:23:55, 7.56s/it] {'loss': 0.3156, 'grad_norm': 0.7528558193109042, 'learning_rate': 7.833794637284232e-08, 'epoch': 0.95} + 95%|█████████▍| 11522/12188 [1:07:34<1:23:55, 7.56s/it] 95%|█████████▍| 11523/12188 [1:07:44<1:30:59, 8.21s/it] {'loss': 0.2584, 'grad_norm': 0.6123521245013263, 'learning_rate': 7.810383907198182e-08, 'epoch': 0.95} + 95%|█████████▍| 11523/12188 [1:07:44<1:30:59, 8.21s/it] 95%|█████████▍| 11524/12188 [1:07:51<1:28:27, 7.99s/it] {'loss': 0.2699, 'grad_norm': 0.698069537920354, 'learning_rate': 7.787007934752067e-08, 'epoch': 0.95} + 95%|█████████▍| 11524/12188 [1:07:51<1:28:27, 7.99s/it] 95%|█████████▍| 11525/12188 [1:07:58<1:24:49, 7.68s/it] {'loss': 0.2654, 'grad_norm': 0.6253031695740199, 'learning_rate': 7.763666721596674e-08, 'epoch': 0.95} + 95%|█████████▍| 11525/12188 [1:07:58<1:24:49, 7.68s/it] 95%|█████████▍| 11526/12188 [1:08:05<1:22:25, 7.47s/it] {'loss': 0.33, 'grad_norm': 0.8278452403539497, 'learning_rate': 7.740360269380243e-08, 'epoch': 0.95} + 95%|█████████▍| 11526/12188 [1:08:05<1:22:25, 7.47s/it] 95%|█████████▍| 11527/12188 [1:08:13<1:21:47, 7.43s/it] {'loss': 0.3013, 'grad_norm': 0.6841147842578632, 'learning_rate': 7.717088579748733e-08, 'epoch': 0.95} + 95%|█████████▍| 11527/12188 [1:08:13<1:21:47, 7.43s/it] 95%|█████████▍| 11528/12188 [1:08:20<1:22:23, 7.49s/it] {'loss': 0.3106, 'grad_norm': 0.7151474520340029, 'learning_rate': 7.693851654345497e-08, 'epoch': 0.95} + 95%|█████████▍| 11528/12188 [1:08:20<1:22:23, 7.49s/it] 95%|█████████▍| 11529/12188 [1:08:27<1:19:31, 7.24s/it] {'loss': 0.3014, 'grad_norm': 0.7235754430591379, 'learning_rate': 7.670649494811555e-08, 'epoch': 0.95} + 95%|█████████▍| 11529/12188 [1:08:27<1:19:31, 7.24s/it] 95%|█████████▍| 11530/12188 [1:08:34<1:17:43, 7.09s/it] {'loss': 0.3015, 'grad_norm': 0.7921481061003893, 'learning_rate': 7.647482102785264e-08, 'epoch': 0.95} + 95%|█████████▍| 11530/12188 [1:08:34<1:17:43, 7.09s/it] 95%|█████████▍| 11531/12188 [1:08:41<1:18:38, 7.18s/it] {'loss': 0.3442, 'grad_norm': 0.8246277535969968, 'learning_rate': 7.62434947990287e-08, 'epoch': 0.95} + 95%|█████████▍| 11531/12188 [1:08:41<1:18:38, 7.18s/it] 95%|█████████▍| 11532/12188 [1:08:48<1:18:49, 7.21s/it] {'loss': 0.3127, 'grad_norm': 0.6989136560823854, 'learning_rate': 7.60125162779779e-08, 'epoch': 0.95} + 95%|█████████▍| 11532/12188 [1:08:48<1:18:49, 7.21s/it] 95%|█████████▍| 11533/12188 [1:08:55<1:16:54, 7.05s/it] {'loss': 0.3059, 'grad_norm': 0.7958715032870085, 'learning_rate': 7.578188548101217e-08, 'epoch': 0.95} + 95%|█████████▍| 11533/12188 [1:08:55<1:16:54, 7.05s/it] 95%|█████████▍| 11534/12188 [1:09:02<1:17:10, 7.08s/it] {'loss': 0.3171, 'grad_norm': 0.7657949993458952, 'learning_rate': 7.55516024244185e-08, 'epoch': 0.95} + 95%|█████████▍| 11534/12188 [1:09:02<1:17:10, 7.08s/it] 95%|█████████▍| 11535/12188 [1:09:11<1:24:08, 7.73s/it] {'loss': 0.3205, 'grad_norm': 0.6681598835584888, 'learning_rate': 7.532166712445888e-08, 'epoch': 0.95} + 95%|█████████▍| 11535/12188 [1:09:11<1:24:08, 7.73s/it] 95%|█████████▍| 11536/12188 [1:09:20<1:25:24, 7.86s/it] {'loss': 0.3063, 'grad_norm': 0.6725676787529359, 'learning_rate': 7.509207959737086e-08, 'epoch': 0.95} + 95%|█████████▍| 11536/12188 [1:09:20<1:25:24, 7.86s/it] 95%|█████████▍| 11537/12188 [1:09:27<1:24:12, 7.76s/it] {'loss': 0.3144, 'grad_norm': 0.729711697089312, 'learning_rate': 7.486283985936815e-08, 'epoch': 0.95} + 95%|█████████▍| 11537/12188 [1:09:27<1:24:12, 7.76s/it] 95%|█████████▍| 11538/12188 [1:09:34<1:21:53, 7.56s/it] {'loss': 0.2736, 'grad_norm': 0.6396924888332544, 'learning_rate': 7.463394792663891e-08, 'epoch': 0.95} + 95%|█████████▍| 11538/12188 [1:09:34<1:21:53, 7.56s/it] 95%|█████████▍| 11539/12188 [1:09:44<1:29:39, 8.29s/it] {'loss': 0.2972, 'grad_norm': 0.7215909890745347, 'learning_rate': 7.440540381534633e-08, 'epoch': 0.95} + 95%|█████████▍| 11539/12188 [1:09:44<1:29:39, 8.29s/it] 95%|█████████▍| 11540/12188 [1:09:51<1:24:16, 7.80s/it] {'loss': 0.3288, 'grad_norm': 0.7619468882109242, 'learning_rate': 7.417720754163138e-08, 'epoch': 0.95} + 95%|█████████▍| 11540/12188 [1:09:51<1:24:16, 7.80s/it] 95%|█████████▍| 11541/12188 [1:09:57<1:20:27, 7.46s/it] {'loss': 0.3093, 'grad_norm': 0.6847447074001108, 'learning_rate': 7.394935912160728e-08, 'epoch': 0.95} + 95%|█████████▍| 11541/12188 [1:09:57<1:20:27, 7.46s/it] 95%|█████████▍| 11542/12188 [1:10:05<1:21:57, 7.61s/it] {'loss': 0.2549, 'grad_norm': 0.6662001504599249, 'learning_rate': 7.372185857136616e-08, 'epoch': 0.95} + 95%|█████████▍| 11542/12188 [1:10:05<1:21:57, 7.61s/it] 95%|█████████▍| 11543/12188 [1:10:12<1:19:19, 7.38s/it] {'loss': 0.3208, 'grad_norm': 0.6935433755646317, 'learning_rate': 7.349470590697183e-08, 'epoch': 0.95} + 95%|█████████▍| 11543/12188 [1:10:12<1:19:19, 7.38s/it] 95%|█████████▍| 11544/12188 [1:10:19<1:17:09, 7.19s/it] {'loss': 0.2841, 'grad_norm': 0.6253466644481582, 'learning_rate': 7.326790114446647e-08, 'epoch': 0.95} + 95%|█████████▍| 11544/12188 [1:10:19<1:17:09, 7.19s/it] 95%|█████████▍| 11545/12188 [1:10:26<1:16:30, 7.14s/it] {'loss': 0.3011, 'grad_norm': 0.6946140416447872, 'learning_rate': 7.30414442998667e-08, 'epoch': 0.95} + 95%|█████████▍| 11545/12188 [1:10:26<1:16:30, 7.14s/it] 95%|█████████▍| 11546/12188 [1:10:33<1:15:32, 7.06s/it] {'loss': 0.2863, 'grad_norm': 0.8046203470950303, 'learning_rate': 7.281533538916474e-08, 'epoch': 0.95} + 95%|█████████▍| 11546/12188 [1:10:33<1:15:32, 7.06s/it] 95%|█████████▍| 11547/12188 [1:10:40<1:14:39, 6.99s/it] {'loss': 0.3521, 'grad_norm': 0.6997728529455726, 'learning_rate': 7.258957442832726e-08, 'epoch': 0.95} + 95%|█████████▍| 11547/12188 [1:10:40<1:14:39, 6.99s/it] 95%|█████████▍| 11548/12188 [1:10:47<1:16:12, 7.14s/it] {'loss': 0.2794, 'grad_norm': 0.6480467333181041, 'learning_rate': 7.236416143329761e-08, 'epoch': 0.95} + 95%|█████████▍| 11548/12188 [1:10:47<1:16:12, 7.14s/it] 95%|█████████▍| 11549/12188 [1:10:54<1:15:07, 7.05s/it] {'loss': 0.3164, 'grad_norm': 0.7765873946783468, 'learning_rate': 7.213909641999361e-08, 'epoch': 0.95} + 95%|█████████▍| 11549/12188 [1:10:54<1:15:07, 7.05s/it] 95%|█████████▍| 11550/12188 [1:11:02<1:16:12, 7.17s/it] {'loss': 0.263, 'grad_norm': 0.7289463013562681, 'learning_rate': 7.191437940431034e-08, 'epoch': 0.95} + 95%|█████████▍| 11550/12188 [1:11:02<1:16:12, 7.17s/it] 95%|█████████▍| 11551/12188 [1:11:11<1:23:47, 7.89s/it] {'loss': 0.3066, 'grad_norm': 0.7150391833284083, 'learning_rate': 7.16900104021151e-08, 'epoch': 0.95} + 95%|█████████▍| 11551/12188 [1:11:11<1:23:47, 7.89s/it] 95%|█████████▍| 11552/12188 [1:11:20<1:25:29, 8.07s/it] {'loss': 0.3078, 'grad_norm': 0.7215273428648556, 'learning_rate': 7.146598942925353e-08, 'epoch': 0.95} + 95%|█████████▍| 11552/12188 [1:11:20<1:25:29, 8.07s/it] 95%|█████████▍| 11553/12188 [1:11:27<1:22:57, 7.84s/it] {'loss': 0.2825, 'grad_norm': 0.7262039598763887, 'learning_rate': 7.124231650154633e-08, 'epoch': 0.95} + 95%|█████████▍| 11553/12188 [1:11:27<1:22:57, 7.84s/it] 95%|█████████▍| 11554/12188 [1:11:34<1:19:40, 7.54s/it] {'loss': 0.2694, 'grad_norm': 0.7379093265405835, 'learning_rate': 7.101899163478698e-08, 'epoch': 0.95} + 95%|█████████▍| 11554/12188 [1:11:34<1:19:40, 7.54s/it] 95%|█████████▍| 11555/12188 [1:11:41<1:20:12, 7.60s/it] {'loss': 0.3288, 'grad_norm': 0.6533021622214462, 'learning_rate': 7.079601484474841e-08, 'epoch': 0.95} + 95%|█████████▍| 11555/12188 [1:11:41<1:20:12, 7.60s/it] 95%|█████████▍| 11556/12188 [1:11:48<1:18:00, 7.41s/it] {'loss': 0.2878, 'grad_norm': 0.7133269652264904, 'learning_rate': 7.057338614717523e-08, 'epoch': 0.95} + 95%|█████████▍| 11556/12188 [1:11:48<1:18:00, 7.41s/it] 95%|█████████▍| 11557/12188 [1:11:55<1:16:33, 7.28s/it] {'loss': 0.2982, 'grad_norm': 0.7150496065535697, 'learning_rate': 7.03511055577899e-08, 'epoch': 0.95} + 95%|█████████▍| 11557/12188 [1:11:55<1:16:33, 7.28s/it] 95%|█████████▍| 11558/12188 [1:12:03<1:18:52, 7.51s/it] {'loss': 0.2709, 'grad_norm': 0.7280750142346938, 'learning_rate': 7.012917309228984e-08, 'epoch': 0.95} + 95%|█████████▍| 11558/12188 [1:12:03<1:18:52, 7.51s/it] 95%|█████████▍| 11559/12188 [1:12:11<1:18:39, 7.50s/it] {'loss': 0.3035, 'grad_norm': 0.6862163373603108, 'learning_rate': 6.990758876634695e-08, 'epoch': 0.95} + 95%|█████████▍| 11559/12188 [1:12:11<1:18:39, 7.50s/it] 95%|█████████▍| 11560/12188 [1:12:18<1:16:24, 7.30s/it] {'loss': 0.2846, 'grad_norm': 0.8357338879954065, 'learning_rate': 6.968635259560874e-08, 'epoch': 0.95} + 95%|█████████▍| 11560/12188 [1:12:18<1:16:24, 7.30s/it] 95%|█████████▍| 11561/12188 [1:12:24<1:14:21, 7.12s/it] {'loss': 0.2536, 'grad_norm': 0.7666437223752023, 'learning_rate': 6.946546459569991e-08, 'epoch': 0.95} + 95%|█████████▍| 11561/12188 [1:12:24<1:14:21, 7.12s/it] 95%|█████████▍| 11562/12188 [1:12:32<1:15:00, 7.19s/it] {'loss': 0.2729, 'grad_norm': 0.6604804657222074, 'learning_rate': 6.9244924782218e-08, 'epoch': 0.95} + 95%|█████████▍| 11562/12188 [1:12:32<1:15:00, 7.19s/it] 95%|█████████▍| 11563/12188 [1:12:41<1:21:05, 7.78s/it] {'loss': 0.2997, 'grad_norm': 0.6839808633562079, 'learning_rate': 6.902473317073776e-08, 'epoch': 0.95} + 95%|█████████▍| 11563/12188 [1:12:41<1:21:05, 7.78s/it] 95%|█████████▍| 11564/12188 [1:12:48<1:17:56, 7.50s/it] {'loss': 0.292, 'grad_norm': 0.746760336342536, 'learning_rate': 6.880488977680899e-08, 'epoch': 0.95} + 95%|█████████▍| 11564/12188 [1:12:48<1:17:56, 7.50s/it] 95%|█████████▍| 11565/12188 [1:12:56<1:21:19, 7.83s/it] {'loss': 0.2995, 'grad_norm': 0.6729747633160071, 'learning_rate': 6.858539461595593e-08, 'epoch': 0.95} + 95%|█████████▍| 11565/12188 [1:12:56<1:21:19, 7.83s/it] 95%|█████████▍| 11566/12188 [1:13:04<1:19:33, 7.67s/it] {'loss': 0.2837, 'grad_norm': 0.6780789329180555, 'learning_rate': 6.83662477036795e-08, 'epoch': 0.95} + 95%|█████████▍| 11566/12188 [1:13:04<1:19:33, 7.67s/it] 95%|█████████▍| 11567/12188 [1:13:11<1:17:25, 7.48s/it] {'loss': 0.3126, 'grad_norm': 0.7395527307562152, 'learning_rate': 6.814744905545512e-08, 'epoch': 0.95} + 95%|█████████▍| 11567/12188 [1:13:11<1:17:25, 7.48s/it] 95%|█████████▍| 11568/12188 [1:13:19<1:19:08, 7.66s/it] {'loss': 0.2712, 'grad_norm': 0.7444507237620587, 'learning_rate': 6.792899868673487e-08, 'epoch': 0.95} + 95%|█████████▍| 11568/12188 [1:13:19<1:19:08, 7.66s/it] 95%|█████████▍| 11569/12188 [1:13:26<1:17:38, 7.53s/it] {'loss': 0.3185, 'grad_norm': 0.7044730837029685, 'learning_rate': 6.771089661294417e-08, 'epoch': 0.95} + 95%|█████████▍| 11569/12188 [1:13:26<1:17:38, 7.53s/it] 95%|█████████▍| 11570/12188 [1:13:34<1:17:42, 7.54s/it] {'loss': 0.2412, 'grad_norm': 0.8477031646342126, 'learning_rate': 6.749314284948571e-08, 'epoch': 0.95} + 95%|█████████▍| 11570/12188 [1:13:34<1:17:42, 7.54s/it] 95%|█████████▍| 11571/12188 [1:13:40<1:14:42, 7.27s/it] {'loss': 0.3475, 'grad_norm': 0.7438635406416854, 'learning_rate': 6.727573741173721e-08, 'epoch': 0.95} + 95%|█████████▍| 11571/12188 [1:13:40<1:14:42, 7.27s/it] 95%|█████████▍| 11572/12188 [1:13:47<1:14:09, 7.22s/it] {'loss': 0.2652, 'grad_norm': 0.7097416999367886, 'learning_rate': 6.705868031505025e-08, 'epoch': 0.95} + 95%|█████████▍| 11572/12188 [1:13:47<1:14:09, 7.22s/it] 95%|█████████▍| 11573/12188 [1:13:54<1:12:39, 7.09s/it] {'loss': 0.2787, 'grad_norm': 0.739372495314965, 'learning_rate': 6.68419715747548e-08, 'epoch': 0.95} + 95%|█████████▍| 11573/12188 [1:13:54<1:12:39, 7.09s/it] 95%|█████████▍| 11574/12188 [1:14:01<1:12:15, 7.06s/it] {'loss': 0.3002, 'grad_norm': 0.7070968706954952, 'learning_rate': 6.662561120615363e-08, 'epoch': 0.95} + 95%|█████████▍| 11574/12188 [1:14:01<1:12:15, 7.06s/it] 95%|█████████▍| 11575/12188 [1:14:08<1:10:51, 6.94s/it] {'loss': 0.3522, 'grad_norm': 0.6911514624360522, 'learning_rate': 6.640959922452506e-08, 'epoch': 0.95} + 95%|█████████▍| 11575/12188 [1:14:08<1:10:51, 6.94s/it] 95%|█████████▍| 11576/12188 [1:14:14<1:09:39, 6.83s/it] {'loss': 0.3115, 'grad_norm': 0.9263943895497337, 'learning_rate': 6.619393564512466e-08, 'epoch': 0.95} + 95%|█████████▍| 11576/12188 [1:14:14<1:09:39, 6.83s/it] 95%|█████████▍| 11577/12188 [1:14:21<1:10:10, 6.89s/it] {'loss': 0.3103, 'grad_norm': 0.6287839216292413, 'learning_rate': 6.597862048318193e-08, 'epoch': 0.95} + 95%|█████████▍| 11577/12188 [1:14:21<1:10:10, 6.89s/it] 95%|█████████▍| 11578/12188 [1:14:28<1:10:17, 6.91s/it] {'loss': 0.2817, 'grad_norm': 0.6352098461586072, 'learning_rate': 6.576365375390137e-08, 'epoch': 0.95} + 95%|█████████▍| 11578/12188 [1:14:28<1:10:17, 6.91s/it] 95%|█████████▌| 11579/12188 [1:14:35<1:09:25, 6.84s/it] {'loss': 0.2922, 'grad_norm': 0.7190120014594147, 'learning_rate': 6.554903547246416e-08, 'epoch': 0.95} + 95%|█████████▌| 11579/12188 [1:14:35<1:09:25, 6.84s/it] 95%|█████████▌| 11580/12188 [1:14:43<1:11:12, 7.03s/it] {'loss': 0.2864, 'grad_norm': 0.7162917367834281, 'learning_rate': 6.533476565402653e-08, 'epoch': 0.95} + 95%|█████████▌| 11580/12188 [1:14:43<1:11:12, 7.03s/it] 95%|█████████▌| 11581/12188 [1:14:51<1:14:04, 7.32s/it] {'loss': 0.3288, 'grad_norm': 0.6839735767934179, 'learning_rate': 6.512084431371968e-08, 'epoch': 0.95} + 95%|█████████▌| 11581/12188 [1:14:51<1:14:04, 7.32s/it] 95%|█████████▌| 11582/12188 [1:14:57<1:12:52, 7.22s/it] {'loss': 0.2969, 'grad_norm': 0.9030857218438356, 'learning_rate': 6.490727146664988e-08, 'epoch': 0.95} + 95%|█████████▌| 11582/12188 [1:14:58<1:12:52, 7.22s/it] 95%|█████████▌| 11583/12188 [1:15:05<1:12:29, 7.19s/it] {'loss': 0.2875, 'grad_norm': 0.7419269635911473, 'learning_rate': 6.469404712790062e-08, 'epoch': 0.95} + 95%|█████████▌| 11583/12188 [1:15:05<1:12:29, 7.19s/it] 95%|█████████▌| 11584/12188 [1:15:12<1:11:26, 7.10s/it] {'loss': 0.2933, 'grad_norm': 0.8152393349624054, 'learning_rate': 6.448117131252762e-08, 'epoch': 0.95} + 95%|█████████▌| 11584/12188 [1:15:12<1:11:26, 7.10s/it] 95%|█████████▌| 11585/12188 [1:15:19<1:11:21, 7.10s/it] {'loss': 0.2508, 'grad_norm': 0.68322841820342, 'learning_rate': 6.426864403556499e-08, 'epoch': 0.95} + 95%|█████████▌| 11585/12188 [1:15:19<1:11:21, 7.10s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7ef858a445e0> +[Try #0] Failed to fetch sample 4556147 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7ef858a445e0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'MediaWiki-announce '"}, {'from': 'gpt', 'value': '\nclick(x=0.261, y=0.1035)\n'}]} + 95%|█████████▌| 11586/12188 [1:15:29<1:22:38, 8.24s/it] {'loss': 0.3112, 'grad_norm': 0.6263799164255412, 'learning_rate': 6.405646531202126e-08, 'epoch': 0.95} + 95%|█████████▌| 11586/12188 [1:15:30<1:22:38, 8.24s/it] 95%|█████████▌| 11587/12188 [1:15:36<1:18:40, 7.86s/it] {'loss': 0.263, 'grad_norm': 0.6520252778911063, 'learning_rate': 6.384463515687945e-08, 'epoch': 0.95} + 95%|█████████▌| 11587/12188 [1:15:36<1:18:40, 7.86s/it] 95%|█████████▌| 11588/12188 [1:15:43<1:16:03, 7.61s/it] {'loss': 0.3085, 'grad_norm': 0.7771136000985938, 'learning_rate': 6.363315358509926e-08, 'epoch': 0.95} + 95%|█████████▌| 11588/12188 [1:15:44<1:16:03, 7.61s/it] 95%|█████████▌| 11589/12188 [1:15:51<1:14:34, 7.47s/it] {'loss': 0.2963, 'grad_norm': 0.678224544265457, 'learning_rate': 6.342202061161485e-08, 'epoch': 0.95} + 95%|█████████▌| 11589/12188 [1:15:51<1:14:34, 7.47s/it] 95%|█████████▌| 11590/12188 [1:15:59<1:16:15, 7.65s/it] {'loss': 0.2961, 'grad_norm': 0.7827948923899164, 'learning_rate': 6.32112362513354e-08, 'epoch': 0.95} + 95%|█████████▌| 11590/12188 [1:15:59<1:16:15, 7.65s/it] 95%|█████████▌| 11591/12188 [1:16:06<1:15:04, 7.55s/it] {'loss': 0.245, 'grad_norm': 0.7591028988574675, 'learning_rate': 6.300080051914792e-08, 'epoch': 0.95} + 95%|█████████▌| 11591/12188 [1:16:06<1:15:04, 7.55s/it] 95%|█████████▌| 11592/12188 [1:16:12<1:11:43, 7.22s/it] {'loss': 0.3027, 'grad_norm': 0.7731580961728297, 'learning_rate': 6.279071342991106e-08, 'epoch': 0.95} + 95%|█████████▌| 11592/12188 [1:16:12<1:11:43, 7.22s/it] 95%|█████████▌| 11593/12188 [1:16:19<1:10:10, 7.08s/it] {'loss': 0.2808, 'grad_norm': 0.7328031374001274, 'learning_rate': 6.258097499846238e-08, 'epoch': 0.95} + 95%|█████████▌| 11593/12188 [1:16:19<1:10:10, 7.08s/it] 95%|█████████▌| 11594/12188 [1:16:26<1:09:12, 6.99s/it] {'loss': 0.3031, 'grad_norm': 0.6950391145523493, 'learning_rate': 6.237158523961284e-08, 'epoch': 0.95} + 95%|█████████▌| 11594/12188 [1:16:26<1:09:12, 6.99s/it] 95%|█████████▌| 11595/12188 [1:16:35<1:14:30, 7.54s/it] {'loss': 0.3112, 'grad_norm': 0.7164181868038203, 'learning_rate': 6.216254416814837e-08, 'epoch': 0.95} + 95%|█████████▌| 11595/12188 [1:16:35<1:14:30, 7.54s/it] 95%|█████████▌| 11596/12188 [1:16:42<1:12:21, 7.33s/it] {'loss': 0.2862, 'grad_norm': 0.650803790215538, 'learning_rate': 6.195385179883218e-08, 'epoch': 0.95} + 95%|█████████▌| 11596/12188 [1:16:42<1:12:21, 7.33s/it] 95%|█████████▌| 11597/12188 [1:16:48<1:09:58, 7.10s/it] {'loss': 0.349, 'grad_norm': 0.7259346670333012, 'learning_rate': 6.174550814640135e-08, 'epoch': 0.95} + 95%|█████████▌| 11597/12188 [1:16:48<1:09:58, 7.10s/it] 95%|█████████▌| 11598/12188 [1:16:56<1:10:22, 7.16s/it] {'loss': 0.2723, 'grad_norm': 0.7012911315330359, 'learning_rate': 6.153751322556855e-08, 'epoch': 0.95} + 95%|█████████▌| 11598/12188 [1:16:56<1:10:22, 7.16s/it] 95%|█████████▌| 11599/12188 [1:17:03<1:12:17, 7.36s/it] {'loss': 0.2857, 'grad_norm': 0.7570894879406186, 'learning_rate': 6.132986705102317e-08, 'epoch': 0.95} + 95%|█████████▌| 11599/12188 [1:17:03<1:12:17, 7.36s/it] 95%|█████████▌| 11600/12188 [1:17:11<1:12:31, 7.40s/it] {'loss': 0.2771, 'grad_norm': 0.6887493679751803, 'learning_rate': 6.112256963742735e-08, 'epoch': 0.95} + 95%|█████████▌| 11600/12188 [1:17:11<1:12:31, 7.40s/it] 95%|█████████▌| 11601/12188 [1:17:18<1:12:02, 7.36s/it] {'loss': 0.2915, 'grad_norm': 0.7862729687083195, 'learning_rate': 6.091562099942106e-08, 'epoch': 0.95} + 95%|█████████▌| 11601/12188 [1:17:18<1:12:02, 7.36s/it] 95%|█████████▌| 11602/12188 [1:17:25<1:10:17, 7.20s/it] {'loss': 0.306, 'grad_norm': 0.8153059117217676, 'learning_rate': 6.070902115161814e-08, 'epoch': 0.95} + 95%|█████████▌| 11602/12188 [1:17:25<1:10:17, 7.20s/it] 95%|█████████▌| 11603/12188 [1:17:31<1:08:08, 6.99s/it] {'loss': 0.3242, 'grad_norm': 0.7477472548828565, 'learning_rate': 6.050277010860861e-08, 'epoch': 0.95} + 95%|█████████▌| 11603/12188 [1:17:31<1:08:08, 6.99s/it] 95%|█████████▌| 11604/12188 [1:17:38<1:07:25, 6.93s/it] {'loss': 0.2489, 'grad_norm': 0.7382492747971036, 'learning_rate': 6.029686788495748e-08, 'epoch': 0.95} + 95%|█████████▌| 11604/12188 [1:17:38<1:07:25, 6.93s/it] 95%|█████████▌| 11605/12188 [1:17:48<1:15:11, 7.74s/it] {'loss': 0.3053, 'grad_norm': 0.7249089870631209, 'learning_rate': 6.009131449520534e-08, 'epoch': 0.95} + 95%|█████████▌| 11605/12188 [1:17:48<1:15:11, 7.74s/it] 95%|█████████▌| 11606/12188 [1:17:55<1:14:02, 7.63s/it] {'loss': 0.2996, 'grad_norm': 0.6882230930395642, 'learning_rate': 5.988610995386779e-08, 'epoch': 0.95} + 95%|█████████▌| 11606/12188 [1:17:55<1:14:02, 7.63s/it] 95%|█████████▌| 11607/12188 [1:18:02<1:11:54, 7.43s/it] {'loss': 0.2791, 'grad_norm': 0.6667108815204205, 'learning_rate': 5.968125427543658e-08, 'epoch': 0.95} + 95%|█████████▌| 11607/12188 [1:18:02<1:11:54, 7.43s/it] 95%|█████████▌| 11608/12188 [1:18:10<1:11:33, 7.40s/it] {'loss': 0.3087, 'grad_norm': 0.7149055022839301, 'learning_rate': 5.947674747437793e-08, 'epoch': 0.95} + 95%|█████████▌| 11608/12188 [1:18:10<1:11:33, 7.40s/it] 95%|█████████▌| 11609/12188 [1:18:17<1:10:50, 7.34s/it] {'loss': 0.2701, 'grad_norm': 0.6635287044058755, 'learning_rate': 5.9272589565133595e-08, 'epoch': 0.95} + 95%|█████████▌| 11609/12188 [1:18:17<1:10:50, 7.34s/it] 95%|█████████▌| 11610/12188 [1:18:23<1:08:33, 7.12s/it] {'loss': 0.3, 'grad_norm': 0.7336224609937, 'learning_rate': 5.906878056212151e-08, 'epoch': 0.95} + 95%|█████████▌| 11610/12188 [1:18:23<1:08:33, 7.12s/it] 95%|█████████▌| 11611/12188 [1:18:31<1:09:07, 7.19s/it] {'loss': 0.3041, 'grad_norm': 0.7603633450630851, 'learning_rate': 5.886532047973348e-08, 'epoch': 0.95} + 95%|█████████▌| 11611/12188 [1:18:31<1:09:07, 7.19s/it] 95%|█████████▌| 11612/12188 [1:18:38<1:08:30, 7.14s/it] {'loss': 0.3317, 'grad_norm': 0.7556214877774455, 'learning_rate': 5.866220933233802e-08, 'epoch': 0.95} + 95%|█████████▌| 11612/12188 [1:18:38<1:08:30, 7.14s/it] 95%|█████████▌| 11613/12188 [1:18:44<1:06:59, 6.99s/it] {'loss': 0.2687, 'grad_norm': 0.6499879614378178, 'learning_rate': 5.8459447134278645e-08, 'epoch': 0.95} + 95%|█████████▌| 11613/12188 [1:18:44<1:06:59, 6.99s/it] 95%|█████████▌| 11614/12188 [1:18:51<1:07:16, 7.03s/it] {'loss': 0.2734, 'grad_norm': 0.6759904611931155, 'learning_rate': 5.825703389987392e-08, 'epoch': 0.95} + 95%|█████████▌| 11614/12188 [1:18:51<1:07:16, 7.03s/it] 95%|█████████▌| 11615/12188 [1:18:59<1:09:25, 7.27s/it] {'loss': 0.315, 'grad_norm': 0.7304406459494974, 'learning_rate': 5.80549696434185e-08, 'epoch': 0.95} + 95%|█████████▌| 11615/12188 [1:18:59<1:09:25, 7.27s/it] 95%|█████████▌| 11616/12188 [1:19:06<1:08:03, 7.14s/it] {'loss': 0.2938, 'grad_norm': 0.6654827429380672, 'learning_rate': 5.785325437918044e-08, 'epoch': 0.95} + 95%|█████████▌| 11616/12188 [1:19:06<1:08:03, 7.14s/it] 95%|█████████▌| 11617/12188 [1:19:13<1:08:08, 7.16s/it] {'loss': 0.326, 'grad_norm': 0.7624044298103291, 'learning_rate': 5.765188812140554e-08, 'epoch': 0.95} + 95%|█████████▌| 11617/12188 [1:19:13<1:08:08, 7.16s/it] 95%|█████████▌| 11618/12188 [1:19:21<1:08:02, 7.16s/it] {'loss': 0.3002, 'grad_norm': 0.759540900002612, 'learning_rate': 5.745087088431411e-08, 'epoch': 0.95} + 95%|█████████▌| 11618/12188 [1:19:21<1:08:02, 7.16s/it] 95%|█████████▌| 11619/12188 [1:19:28<1:07:55, 7.16s/it] {'loss': 0.2821, 'grad_norm': 0.6596214054164222, 'learning_rate': 5.7250202682100907e-08, 'epoch': 0.95} + 95%|█████████▌| 11619/12188 [1:19:28<1:07:55, 7.16s/it] 95%|█████████▌| 11620/12188 [1:19:35<1:08:41, 7.26s/it] {'loss': 0.3065, 'grad_norm': 0.6532224200181446, 'learning_rate': 5.704988352893792e-08, 'epoch': 0.95} + 95%|█████████▌| 11620/12188 [1:19:35<1:08:41, 7.26s/it] 95%|█████████▌| 11621/12188 [1:19:42<1:08:00, 7.20s/it] {'loss': 0.3046, 'grad_norm': 0.7548396381803615, 'learning_rate': 5.684991343896995e-08, 'epoch': 0.95} + 95%|█████████▌| 11621/12188 [1:19:42<1:08:00, 7.20s/it] 95%|█████████▌| 11622/12188 [1:19:52<1:15:15, 7.98s/it] {'loss': 0.2641, 'grad_norm': 0.6476171837474439, 'learning_rate': 5.665029242632014e-08, 'epoch': 0.95} + 95%|█████████▌| 11622/12188 [1:19:52<1:15:15, 7.98s/it] 95%|█████████▌| 11623/12188 [1:19:59<1:11:52, 7.63s/it] {'loss': 0.2859, 'grad_norm': 0.773683116800928, 'learning_rate': 5.645102050508389e-08, 'epoch': 0.95} + 95%|█████████▌| 11623/12188 [1:19:59<1:11:52, 7.63s/it] 95%|█████████▌| 11624/12188 [1:20:06<1:09:08, 7.36s/it] {'loss': 0.2701, 'grad_norm': 0.7024887473413752, 'learning_rate': 5.625209768933437e-08, 'epoch': 0.95} + 95%|█████████▌| 11624/12188 [1:20:06<1:09:08, 7.36s/it] 95%|█████████▌| 11625/12188 [1:20:13<1:08:52, 7.34s/it] {'loss': 0.2897, 'grad_norm': 0.8216771007651948, 'learning_rate': 5.60535239931187e-08, 'epoch': 0.95} + 95%|█████████▌| 11625/12188 [1:20:13<1:08:52, 7.34s/it] 95%|█████████▌| 11626/12188 [1:20:20<1:08:11, 7.28s/it] {'loss': 0.2963, 'grad_norm': 1.0410426481640658, 'learning_rate': 5.585529943046064e-08, 'epoch': 0.95} + 95%|█████████▌| 11626/12188 [1:20:20<1:08:11, 7.28s/it] 95%|█████████▌| 11627/12188 [1:20:27<1:07:38, 7.23s/it] {'loss': 0.2855, 'grad_norm': 0.6845774321508172, 'learning_rate': 5.5657424015357895e-08, 'epoch': 0.95} + 95%|█████████▌| 11627/12188 [1:20:27<1:07:38, 7.23s/it] 95%|█████████▌| 11628/12188 [1:20:34<1:06:34, 7.13s/it] {'loss': 0.2998, 'grad_norm': 0.6887145280848466, 'learning_rate': 5.545989776178373e-08, 'epoch': 0.95} + 95%|█████████▌| 11628/12188 [1:20:34<1:06:34, 7.13s/it] 95%|█████████▌| 11629/12188 [1:20:43<1:12:36, 7.79s/it] {'loss': 0.2914, 'grad_norm': 0.7918662010135811, 'learning_rate': 5.5262720683688096e-08, 'epoch': 0.95} + 95%|█████████▌| 11629/12188 [1:20:43<1:12:36, 7.79s/it] 95%|█████████▌| 11630/12188 [1:20:54<1:20:29, 8.65s/it] {'loss': 0.299, 'grad_norm': 0.6833992218087945, 'learning_rate': 5.506589279499486e-08, 'epoch': 0.95} + 95%|█████████▌| 11630/12188 [1:20:54<1:20:29, 8.65s/it] 95%|█████████▌| 11631/12188 [1:21:01<1:14:29, 8.02s/it] {'loss': 0.2863, 'grad_norm': 0.6912723037232962, 'learning_rate': 5.486941410960289e-08, 'epoch': 0.95} + 95%|█████████▌| 11631/12188 [1:21:01<1:14:29, 8.02s/it] 95%|█████████▌| 11632/12188 [1:21:08<1:12:29, 7.82s/it] {'loss': 0.3251, 'grad_norm': 0.6567065608513539, 'learning_rate': 5.467328464138888e-08, 'epoch': 0.95} + 95%|█████████▌| 11632/12188 [1:21:08<1:12:29, 7.82s/it] 95%|█████████▌| 11633/12188 [1:21:18<1:17:59, 8.43s/it] {'loss': 0.3203, 'grad_norm': 0.7372210704688765, 'learning_rate': 5.4477504404201185e-08, 'epoch': 0.95} + 95%|█████████▌| 11633/12188 [1:21:18<1:17:59, 8.43s/it] 95%|█████████▌| 11634/12188 [1:21:25<1:13:31, 7.96s/it] {'loss': 0.3263, 'grad_norm': 0.7017065346999327, 'learning_rate': 5.4282073411867087e-08, 'epoch': 0.95} + 95%|█████████▌| 11634/12188 [1:21:25<1:13:31, 7.96s/it] 95%|█████████▌| 11635/12188 [1:21:32<1:11:59, 7.81s/it] {'loss': 0.3171, 'grad_norm': 0.6878245657248728, 'learning_rate': 5.4086991678187206e-08, 'epoch': 0.95} + 95%|█████████▌| 11635/12188 [1:21:32<1:11:59, 7.81s/it] 95%|█████████▌| 11636/12188 [1:21:39<1:09:10, 7.52s/it] {'loss': 0.275, 'grad_norm': 0.781565895451189, 'learning_rate': 5.389225921693775e-08, 'epoch': 0.95} + 95%|█████████▌| 11636/12188 [1:21:39<1:09:10, 7.52s/it] 95%|█████████▌| 11637/12188 [1:21:48<1:13:10, 7.97s/it] {'loss': 0.2863, 'grad_norm': 0.7233628522632246, 'learning_rate': 5.369787604186993e-08, 'epoch': 0.95} + 95%|█████████▌| 11637/12188 [1:21:48<1:13:10, 7.97s/it] 95%|█████████▌| 11638/12188 [1:21:54<1:08:51, 7.51s/it] {'loss': 0.2801, 'grad_norm': 0.7250525855229164, 'learning_rate': 5.350384216671167e-08, 'epoch': 0.95} + 95%|█████████▌| 11638/12188 [1:21:54<1:08:51, 7.51s/it] 95%|█████████▌| 11639/12188 [1:22:01<1:06:55, 7.31s/it] {'loss': 0.2978, 'grad_norm': 0.7448400219350921, 'learning_rate': 5.331015760516478e-08, 'epoch': 0.95} + 95%|█████████▌| 11639/12188 [1:22:01<1:06:55, 7.31s/it] 96%|█████████▌| 11640/12188 [1:22:09<1:08:11, 7.47s/it] {'loss': 0.2947, 'grad_norm': 0.6290020352643974, 'learning_rate': 5.3116822370907206e-08, 'epoch': 0.95} + 96%|█████████▌| 11640/12188 [1:22:09<1:08:11, 7.47s/it] 96%|█████████▌| 11641/12188 [1:22:17<1:09:17, 7.60s/it] {'loss': 0.2759, 'grad_norm': 0.6318852983543194, 'learning_rate': 5.2923836477591916e-08, 'epoch': 0.96} + 96%|█████████▌| 11641/12188 [1:22:17<1:09:17, 7.60s/it] 96%|█████████▌| 11642/12188 [1:22:25<1:09:19, 7.62s/it] {'loss': 0.2986, 'grad_norm': 0.7534940147624095, 'learning_rate': 5.273119993884745e-08, 'epoch': 0.96} + 96%|█████████▌| 11642/12188 [1:22:25<1:09:19, 7.62s/it] 96%|█████████▌| 11643/12188 [1:22:32<1:07:56, 7.48s/it] {'loss': 0.2858, 'grad_norm': 0.7334591386339353, 'learning_rate': 5.2538912768276826e-08, 'epoch': 0.96} + 96%|█████████▌| 11643/12188 [1:22:32<1:07:56, 7.48s/it] 96%|█████████▌| 11644/12188 [1:22:38<1:05:33, 7.23s/it] {'loss': 0.2792, 'grad_norm': 0.7038931096231203, 'learning_rate': 5.234697497945973e-08, 'epoch': 0.96} + 96%|█████████▌| 11644/12188 [1:22:38<1:05:33, 7.23s/it] 96%|█████████▌| 11645/12188 [1:22:46<1:06:11, 7.31s/it] {'loss': 0.2955, 'grad_norm': 0.7055706297077473, 'learning_rate': 5.215538658595032e-08, 'epoch': 0.96} + 96%|█████████▌| 11645/12188 [1:22:46<1:06:11, 7.31s/it] 96%|█████████▌| 11646/12188 [1:22:53<1:05:23, 7.24s/it] {'loss': 0.3222, 'grad_norm': 0.7326941065370146, 'learning_rate': 5.196414760127777e-08, 'epoch': 0.96} + 96%|█████████▌| 11646/12188 [1:22:53<1:05:23, 7.24s/it] 96%|█████████▌| 11647/12188 [1:23:00<1:03:54, 7.09s/it] {'loss': 0.3157, 'grad_norm': 0.7213190405662987, 'learning_rate': 5.1773258038947395e-08, 'epoch': 0.96} + 96%|█████████▌| 11647/12188 [1:23:00<1:03:54, 7.09s/it] 96%|█████████▌| 11648/12188 [1:23:07<1:03:43, 7.08s/it] {'loss': 0.2858, 'grad_norm': 0.7758008337786876, 'learning_rate': 5.1582717912440076e-08, 'epoch': 0.96} + 96%|█████████▌| 11648/12188 [1:23:07<1:03:43, 7.08s/it] 96%|█████████▌| 11649/12188 [1:23:14<1:03:10, 7.03s/it] {'loss': 0.2615, 'grad_norm': 0.6719978665999999, 'learning_rate': 5.13925272352106e-08, 'epoch': 0.96} + 96%|█████████▌| 11649/12188 [1:23:14<1:03:10, 7.03s/it] 96%|█████████▌| 11650/12188 [1:23:20<1:02:14, 6.94s/it] {'loss': 0.3236, 'grad_norm': 0.7248159111457912, 'learning_rate': 5.120268602069101e-08, 'epoch': 0.96} + 96%|█████████▌| 11650/12188 [1:23:20<1:02:14, 6.94s/it] 96%|█████████▌| 11651/12188 [1:23:27<1:01:39, 6.89s/it] {'loss': 0.3203, 'grad_norm': 0.7654758020193388, 'learning_rate': 5.101319428228613e-08, 'epoch': 0.96} + 96%|█████████▌| 11651/12188 [1:23:27<1:01:39, 6.89s/it] 96%|█████████▌| 11652/12188 [1:23:36<1:05:41, 7.35s/it] {'loss': 0.3014, 'grad_norm': 0.7241721807797219, 'learning_rate': 5.082405203337859e-08, 'epoch': 0.96} + 96%|█████████▌| 11652/12188 [1:23:36<1:05:41, 7.35s/it] 96%|█████████▌| 11653/12188 [1:23:43<1:05:34, 7.35s/it] {'loss': 0.3369, 'grad_norm': 0.6794195931681558, 'learning_rate': 5.0635259287324934e-08, 'epoch': 0.96} + 96%|█████████▌| 11653/12188 [1:23:43<1:05:34, 7.35s/it] 96%|█████████▌| 11654/12188 [1:23:54<1:14:07, 8.33s/it] {'loss': 0.2659, 'grad_norm': 0.6741180808510108, 'learning_rate': 5.044681605745727e-08, 'epoch': 0.96} + 96%|█████████▌| 11654/12188 [1:23:54<1:14:07, 8.33s/it] 96%|█████████▌| 11655/12188 [1:24:00<1:10:03, 7.89s/it] {'loss': 0.3099, 'grad_norm': 0.685678951781702, 'learning_rate': 5.025872235708384e-08, 'epoch': 0.96} + 96%|█████████▌| 11655/12188 [1:24:01<1:10:03, 7.89s/it] 96%|█████████▌| 11656/12188 [1:24:07<1:07:12, 7.58s/it] {'loss': 0.3284, 'grad_norm': 0.7251939630181541, 'learning_rate': 5.0070978199486806e-08, 'epoch': 0.96} + 96%|█████████▌| 11656/12188 [1:24:07<1:07:12, 7.58s/it] 96%|█████████▌| 11657/12188 [1:24:17<1:11:41, 8.10s/it] {'loss': 0.3542, 'grad_norm': 0.7483429582598309, 'learning_rate': 4.9883583597925e-08, 'epoch': 0.96} + 96%|█████████▌| 11657/12188 [1:24:17<1:11:41, 8.10s/it] 96%|█████████▌| 11658/12188 [1:24:24<1:08:11, 7.72s/it] {'loss': 0.2749, 'grad_norm': 0.6992098843367052, 'learning_rate': 4.9696538565631724e-08, 'epoch': 0.96} + 96%|█████████▌| 11658/12188 [1:24:24<1:08:11, 7.72s/it] 96%|█████████▌| 11659/12188 [1:24:30<1:05:35, 7.44s/it] {'loss': 0.2954, 'grad_norm': 0.686988093940636, 'learning_rate': 4.9509843115814746e-08, 'epoch': 0.96} + 96%|█████████▌| 11659/12188 [1:24:30<1:05:35, 7.44s/it] 96%|█████████▌| 11660/12188 [1:24:38<1:05:56, 7.49s/it] {'loss': 0.2909, 'grad_norm': 0.8404909556313411, 'learning_rate': 4.9323497261659635e-08, 'epoch': 0.96} + 96%|█████████▌| 11660/12188 [1:24:38<1:05:56, 7.49s/it] 96%|█████████▌| 11661/12188 [1:24:45<1:04:40, 7.36s/it] {'loss': 0.2957, 'grad_norm': 0.917462375756153, 'learning_rate': 4.9137501016325305e-08, 'epoch': 0.96} + 96%|█████████▌| 11661/12188 [1:24:45<1:04:40, 7.36s/it] 96%|█████████▌| 11662/12188 [1:24:52<1:03:14, 7.21s/it] {'loss': 0.2977, 'grad_norm': 0.7372701515595712, 'learning_rate': 4.8951854392946254e-08, 'epoch': 0.96} + 96%|█████████▌| 11662/12188 [1:24:52<1:03:14, 7.21s/it] 96%|█████████▌| 11663/12188 [1:24:58<1:01:41, 7.05s/it] {'loss': 0.3193, 'grad_norm': 0.7212777209941849, 'learning_rate': 4.8766557404633095e-08, 'epoch': 0.96} + 96%|█████████▌| 11663/12188 [1:24:59<1:01:41, 7.05s/it] 96%|█████████▌| 11664/12188 [1:25:06<1:01:31, 7.04s/it] {'loss': 0.2793, 'grad_norm': 0.6150716566385687, 'learning_rate': 4.858161006447038e-08, 'epoch': 0.96} + 96%|█████████▌| 11664/12188 [1:25:06<1:01:31, 7.04s/it] 96%|█████████▌| 11665/12188 [1:25:12<1:00:20, 6.92s/it] {'loss': 0.2788, 'grad_norm': 0.7036898155565834, 'learning_rate': 4.8397012385519856e-08, 'epoch': 0.96} + 96%|█████████▌| 11665/12188 [1:25:12<1:00:20, 6.92s/it] 96%|█████████▌| 11666/12188 [1:25:20<1:01:18, 7.05s/it] {'loss': 0.2859, 'grad_norm': 0.7284766703525534, 'learning_rate': 4.8212764380816676e-08, 'epoch': 0.96} + 96%|█████████▌| 11666/12188 [1:25:20<1:01:18, 7.05s/it] 96%|█████████▌| 11667/12188 [1:25:27<1:01:28, 7.08s/it] {'loss': 0.2693, 'grad_norm': 0.7020367176690151, 'learning_rate': 4.802886606337209e-08, 'epoch': 0.96} + 96%|█████████▌| 11667/12188 [1:25:27<1:01:28, 7.08s/it] 96%|█████████▌| 11668/12188 [1:25:34<1:01:31, 7.10s/it] {'loss': 0.3287, 'grad_norm': 0.7558188880055008, 'learning_rate': 4.7845317446174044e-08, 'epoch': 0.96} + 96%|█████████▌| 11668/12188 [1:25:34<1:01:31, 7.10s/it] 96%|█████████▌| 11669/12188 [1:25:41<1:01:25, 7.10s/it] {'loss': 0.2954, 'grad_norm': 0.6382451327714997, 'learning_rate': 4.766211854218217e-08, 'epoch': 0.96} + 96%|█████████▌| 11669/12188 [1:25:41<1:01:25, 7.10s/it] 96%|█████████▌| 11670/12188 [1:25:48<1:02:22, 7.22s/it] {'loss': 0.2631, 'grad_norm': 0.6560022062144824, 'learning_rate': 4.747926936433611e-08, 'epoch': 0.96} + 96%|█████████▌| 11670/12188 [1:25:48<1:02:22, 7.22s/it] 96%|█████████▌| 11671/12188 [1:25:55<1:00:41, 7.04s/it] {'loss': 0.2823, 'grad_norm': 0.7164567233528878, 'learning_rate': 4.729676992554666e-08, 'epoch': 0.96} + 96%|█████████▌| 11671/12188 [1:25:55<1:00:41, 7.04s/it] 96%|█████████▌| 11672/12188 [1:26:02<1:01:06, 7.11s/it] {'loss': 0.3135, 'grad_norm': 0.6869894791510921, 'learning_rate': 4.711462023870239e-08, 'epoch': 0.96} + 96%|█████████▌| 11672/12188 [1:26:02<1:01:06, 7.11s/it] 96%|█████████▌| 11673/12188 [1:26:09<1:00:44, 7.08s/it] {'loss': 0.2947, 'grad_norm': 0.7402851855631276, 'learning_rate': 4.693282031666579e-08, 'epoch': 0.96} + 96%|█████████▌| 11673/12188 [1:26:09<1:00:44, 7.08s/it] 96%|█████████▌| 11674/12188 [1:26:16<59:21, 6.93s/it] {'loss': 0.2924, 'grad_norm': 0.7203406906172062, 'learning_rate': 4.6751370172276044e-08, 'epoch': 0.96} + 96%|█████████▌| 11674/12188 [1:26:16<59:21, 6.93s/it] 96%|█████████▌| 11675/12188 [1:26:23<59:40, 6.98s/it] {'loss': 0.2891, 'grad_norm': 0.7314495572041542, 'learning_rate': 4.657026981834623e-08, 'epoch': 0.96} + 96%|█████████▌| 11675/12188 [1:26:23<59:40, 6.98s/it] 96%|█████████▌| 11676/12188 [1:26:30<1:00:14, 7.06s/it] {'loss': 0.2981, 'grad_norm': 0.6725042827595055, 'learning_rate': 4.6389519267666107e-08, 'epoch': 0.96} + 96%|█████████▌| 11676/12188 [1:26:30<1:00:14, 7.06s/it] 96%|█████████▌| 11677/12188 [1:26:38<1:00:45, 7.13s/it] {'loss': 0.3122, 'grad_norm': 0.6991560295330987, 'learning_rate': 4.6209118532999365e-08, 'epoch': 0.96} + 96%|█████████▌| 11677/12188 [1:26:38<1:00:45, 7.13s/it] 96%|█████████▌| 11678/12188 [1:26:45<1:01:55, 7.28s/it] {'loss': 0.2646, 'grad_norm': 0.6365507635438395, 'learning_rate': 4.602906762708526e-08, 'epoch': 0.96} + 96%|█████████▌| 11678/12188 [1:26:45<1:01:55, 7.28s/it] 96%|█████████▌| 11679/12188 [1:26:53<1:02:19, 7.35s/it] {'loss': 0.3085, 'grad_norm': 0.6959512200875712, 'learning_rate': 4.584936656264027e-08, 'epoch': 0.96} + 96%|█████████▌| 11679/12188 [1:26:53<1:02:19, 7.35s/it] 96%|█████████▌| 11680/12188 [1:26:59<1:00:32, 7.15s/it] {'loss': 0.2844, 'grad_norm': 0.6695189285069918, 'learning_rate': 4.56700153523526e-08, 'epoch': 0.96} + 96%|█████████▌| 11680/12188 [1:26:59<1:00:32, 7.15s/it] 96%|█████████▌| 11681/12188 [1:27:06<59:21, 7.02s/it] {'loss': 0.2929, 'grad_norm': 0.7082767304295289, 'learning_rate': 4.549101400888933e-08, 'epoch': 0.96} + 96%|█████████▌| 11681/12188 [1:27:06<59:21, 7.02s/it] 96%|█████████▌| 11682/12188 [1:27:13<58:36, 6.95s/it] {'loss': 0.3221, 'grad_norm': 0.7046141083736669, 'learning_rate': 4.531236254489035e-08, 'epoch': 0.96} + 96%|█████████▌| 11682/12188 [1:27:13<58:36, 6.95s/it] 96%|█████████▌| 11683/12188 [1:27:22<1:03:30, 7.54s/it] {'loss': 0.2825, 'grad_norm': 0.6954344422908116, 'learning_rate': 4.513406097297224e-08, 'epoch': 0.96} + 96%|█████████▌| 11683/12188 [1:27:22<1:03:30, 7.54s/it] 96%|█████████▌| 11684/12188 [1:27:29<1:03:16, 7.53s/it] {'loss': 0.3293, 'grad_norm': 0.6960335718822714, 'learning_rate': 4.495610930572603e-08, 'epoch': 0.96} + 96%|█████████▌| 11684/12188 [1:27:29<1:03:16, 7.53s/it] 96%|█████████▌| 11685/12188 [1:27:37<1:02:29, 7.45s/it] {'loss': 0.2691, 'grad_norm': 0.7641231731629324, 'learning_rate': 4.4778507555718886e-08, 'epoch': 0.96} + 96%|█████████▌| 11685/12188 [1:27:37<1:02:29, 7.45s/it] 96%|█████████▌| 11686/12188 [1:27:44<1:01:26, 7.34s/it] {'loss': 0.3288, 'grad_norm': 0.6734310033930117, 'learning_rate': 4.46012557354919e-08, 'epoch': 0.96} + 96%|█████████▌| 11686/12188 [1:27:44<1:01:26, 7.34s/it] 96%|█████████▌| 11687/12188 [1:27:51<1:00:14, 7.21s/it] {'loss': 0.2775, 'grad_norm': 0.7301522260693947, 'learning_rate': 4.442435385756283e-08, 'epoch': 0.96} + 96%|█████████▌| 11687/12188 [1:27:51<1:00:14, 7.21s/it] 96%|█████████▌| 11688/12188 [1:27:58<1:01:46, 7.41s/it] {'loss': 0.2805, 'grad_norm': 0.9783776097619595, 'learning_rate': 4.4247801934423904e-08, 'epoch': 0.96} + 96%|█████████▌| 11688/12188 [1:27:58<1:01:46, 7.41s/it] 96%|█████████▌| 11689/12188 [1:28:05<1:00:09, 7.23s/it] {'loss': 0.3419, 'grad_norm': 0.7738829097936276, 'learning_rate': 4.407159997854349e-08, 'epoch': 0.96} + 96%|█████████▌| 11689/12188 [1:28:05<1:00:09, 7.23s/it] 96%|█████████▌| 11690/12188 [1:28:12<58:23, 7.04s/it] {'loss': 0.2952, 'grad_norm': 0.683254531203068, 'learning_rate': 4.3895748002364404e-08, 'epoch': 0.96} + 96%|█████████▌| 11690/12188 [1:28:12<58:23, 7.04s/it] 96%|█████████▌| 11691/12188 [1:28:19<58:14, 7.03s/it] {'loss': 0.3151, 'grad_norm': 0.7389100936505388, 'learning_rate': 4.372024601830449e-08, 'epoch': 0.96} + 96%|█████████▌| 11691/12188 [1:28:19<58:14, 7.03s/it] 96%|█████████▌| 11692/12188 [1:28:26<57:22, 6.94s/it] {'loss': 0.2904, 'grad_norm': 0.7015955839599358, 'learning_rate': 4.354509403875884e-08, 'epoch': 0.96} + 96%|█████████▌| 11692/12188 [1:28:26<57:22, 6.94s/it] 96%|█████████▌| 11693/12188 [1:28:33<58:43, 7.12s/it] {'loss': 0.266, 'grad_norm': 0.7115721217593185, 'learning_rate': 4.337029207609478e-08, 'epoch': 0.96} + 96%|█████████▌| 11693/12188 [1:28:33<58:43, 7.12s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 899, in process_image_unified + visual_processed = processor.preprocess(image, return_tensors="pt") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 432, in preprocess + patches, image_grid_thw = self._preprocess( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 252, in _preprocess + resized_height, resized_width = smart_resize( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 69, in smart_resize + raise ValueError(f"height:{height} and width:{width} must be larger than factor:{factor}") +ValueError: height:21 and width:135 must be larger than factor:28 +[Try #0] Failed to fetch sample 2092006 in VC:s3://gui-agent/jedi/images/figma400k/figma400k_extracted/. Exception: height:21 and width:135 must be larger than factor:28 +Problematic sample: {'image': 'b740dccee641dd995e5ce727ca3882efdf31feffa6d5688fe120c85e9c186e93.png', 'conversations': [{'from': 'human', 'value': "\nThe visual attributes of this Button are:\nThe element is a circular button with a gray background and a black plus sign in the center. It has a minimalist design with no additional text or icons. The button is visually distinct due to its simple geometric shape and contrasting colors.\n\nThe spatial layout of this Button:\nThe button is located in the middle section of the interface, to the right of a green circular play button. It is part of a horizontal control panel that includes other interactive elements. The button is positioned between the play button and a three-dot menu icon.\n\nFunctional description of the Button:\nThe primary function of this button is likely to add or save the current item, such as a song or playlist, to a user's library or a specific list. Users can interact with it by clicking or tapping, which would typically result in the item being added to their collection."}, {'from': 'gpt', 'value': '[[0, 0, 1000, 1000]]', 'recipient': 'all', 'end_turn': True}]} + 96%|█████████▌| 11694/12188 [1:28:41<1:00:42, 7.37s/it] {'loss': 0.2979, 'grad_norm': 0.734747024464151, 'learning_rate': 4.319584014265743e-08, 'epoch': 0.96} + 96%|█████████▌| 11694/12188 [1:28:41<1:00:42, 7.37s/it] 96%|█████████▌| 11695/12188 [1:28:51<1:07:00, 8.16s/it] {'loss': 0.2971, 'grad_norm': 0.6540826183903322, 'learning_rate': 4.302173825076639e-08, 'epoch': 0.96} + 96%|█████████▌| 11695/12188 [1:28:51<1:07:00, 8.16s/it] 96%|█████████▌| 11696/12188 [1:29:00<1:09:45, 8.51s/it] {'loss': 0.3182, 'grad_norm': 0.6600167372893414, 'learning_rate': 4.28479864127157e-08, 'epoch': 0.96} + 96%|█████████▌| 11696/12188 [1:29:00<1:09:45, 8.51s/it] 96%|█████████▌| 11697/12188 [1:29:07<1:05:57, 8.06s/it] {'loss': 0.3027, 'grad_norm': 1.0941877191491607, 'learning_rate': 4.2674584640776676e-08, 'epoch': 0.96} + 96%|█████████▌| 11697/12188 [1:29:07<1:05:57, 8.06s/it] 96%|█████████▌| 11698/12188 [1:29:14<1:03:15, 7.75s/it] {'loss': 0.2867, 'grad_norm': 0.6807141322373244, 'learning_rate': 4.250153294719339e-08, 'epoch': 0.96} + 96%|█████████▌| 11698/12188 [1:29:14<1:03:15, 7.75s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f68f5b258a0> +[Try #0] Failed to fetch sample 4475518 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f68f5b258a0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Recent changes'"}, {'from': 'gpt', 'value': '\nclick(x=0.865, y=0.253)\n'}]} + 96%|█████████▌| 11699/12188 [1:29:24<1:08:15, 8.38s/it] {'loss': 0.2718, 'grad_norm': 0.6336577913650467, 'learning_rate': 4.2328831344187196e-08, 'epoch': 0.96} + 96%|█████████▌| 11699/12188 [1:29:24<1:08:15, 8.38s/it] 96%|��████████▌| 11700/12188 [1:29:32<1:06:51, 8.22s/it] {'loss': 0.2999, 'grad_norm': 0.7122282431128393, 'learning_rate': 4.215647984395388e-08, 'epoch': 0.96} + 96%|█████████▌| 11700/12188 [1:29:32<1:06:51, 8.22s/it] 96%|█████████▌| 11701/12188 [1:29:39<1:03:48, 7.86s/it] {'loss': 0.2608, 'grad_norm': 0.892729979836736, 'learning_rate': 4.1984478458664265e-08, 'epoch': 0.96} + 96%|█████████▌| 11701/12188 [1:29:39<1:03:48, 7.86s/it] 96%|█████████▌| 11702/12188 [1:29:46<1:01:06, 7.55s/it] {'loss': 0.2839, 'grad_norm': 0.7161351788373583, 'learning_rate': 4.181282720046531e-08, 'epoch': 0.96} + 96%|█████████▌| 11702/12188 [1:29:46<1:01:06, 7.55s/it] 96%|█████████▌| 11703/12188 [1:29:53<58:37, 7.25s/it] {'loss': 0.3228, 'grad_norm': 0.8338173085214162, 'learning_rate': 4.164152608147842e-08, 'epoch': 0.96} + 96%|█████████▌| 11703/12188 [1:29:53<58:37, 7.25s/it] 96%|█████████▌| 11704/12188 [1:30:00<59:33, 7.38s/it] {'loss': 0.2708, 'grad_norm': 0.8041299720566782, 'learning_rate': 4.147057511380115e-08, 'epoch': 0.96} + 96%|█████████▌| 11704/12188 [1:30:00<59:33, 7.38s/it] 96%|█████████▌| 11705/12188 [1:30:07<58:31, 7.27s/it] {'loss': 0.3491, 'grad_norm': 0.7033765725553215, 'learning_rate': 4.129997430950494e-08, 'epoch': 0.96} + 96%|█████████▌| 11705/12188 [1:30:07<58:31, 7.27s/it] 96%|█████████▌| 11706/12188 [1:30:16<1:02:56, 7.83s/it] {'loss': 0.3111, 'grad_norm': 0.8489331221545499, 'learning_rate': 4.112972368063794e-08, 'epoch': 0.96} + 96%|█████████▌| 11706/12188 [1:30:16<1:02:56, 7.83s/it] 96%|█████████▌| 11707/12188 [1:30:24<1:01:22, 7.66s/it] {'loss': 0.2872, 'grad_norm': 0.7119270088009118, 'learning_rate': 4.095982323922332e-08, 'epoch': 0.96} + 96%|█████████▌| 11707/12188 [1:30:24<1:01:22, 7.66s/it] 96%|█████████▌| 11708/12188 [1:30:31<59:42, 7.46s/it] {'loss': 0.3322, 'grad_norm': 0.7587985239616071, 'learning_rate': 4.079027299725757e-08, 'epoch': 0.96} + 96%|█████████▌| 11708/12188 [1:30:31<59:42, 7.46s/it] 96%|█████████▌| 11709/12188 [1:30:37<57:56, 7.26s/it] {'loss': 0.2367, 'grad_norm': 0.6447430947896051, 'learning_rate': 4.0621072966716135e-08, 'epoch': 0.96} + 96%|█████████▌| 11709/12188 [1:30:37<57:56, 7.26s/it] 96%|█████████▌| 11710/12188 [1:30:45<58:41, 7.37s/it] {'loss': 0.2891, 'grad_norm': 0.6865200550454157, 'learning_rate': 4.045222315954611e-08, 'epoch': 0.96} + 96%|█████████▌| 11710/12188 [1:30:45<58:41, 7.37s/it] 96%|█████████▌| 11711/12188 [1:30:54<1:01:16, 7.71s/it] {'loss': 0.2817, 'grad_norm': 0.6628030365425597, 'learning_rate': 4.0283723587672405e-08, 'epoch': 0.96} + 96%|█████████▌| 11711/12188 [1:30:54<1:01:16, 7.71s/it] 96%|█████████▌| 11712/12188 [1:31:01<59:49, 7.54s/it] {'loss': 0.2917, 'grad_norm': 0.6692793229208751, 'learning_rate': 4.0115574262993284e-08, 'epoch': 0.96} + 96%|█████████▌| 11712/12188 [1:31:01<59:49, 7.54s/it] 96%|█████████▌| 11713/12188 [1:31:07<57:46, 7.30s/it] {'loss': 0.2852, 'grad_norm': 0.7507623413044415, 'learning_rate': 3.994777519738424e-08, 'epoch': 0.96} + 96%|█████████▌| 11713/12188 [1:31:07<57:46, 7.30s/it] 96%|█████████▌| 11714/12188 [1:31:17<1:03:11, 8.00s/it] {'loss': 0.3334, 'grad_norm': 0.7845739425008249, 'learning_rate': 3.9780326402694135e-08, 'epoch': 0.96} + 96%|█████████▌| 11714/12188 [1:31:17<1:03:11, 8.00s/it] 96%|█████████▌| 11715/12188 [1:31:24<1:01:22, 7.79s/it] {'loss': 0.3261, 'grad_norm': 0.7224175926486498, 'learning_rate': 3.961322789074795e-08, 'epoch': 0.96} + 96%|█████████▌| 11715/12188 [1:31:24<1:01:22, 7.79s/it] 96%|█████████▌| 11716/12188 [1:31:32<1:00:27, 7.69s/it] {'loss': 0.27, 'grad_norm': 0.8500115401195615, 'learning_rate': 3.944647967334625e-08, 'epoch': 0.96} + 96%|█████████▌| 11716/12188 [1:31:32<1:00:27, 7.69s/it] 96%|█████████▌| 11717/12188 [1:31:39<1:00:04, 7.65s/it] {'loss': 0.3516, 'grad_norm': 0.701303561725101, 'learning_rate': 3.928008176226461e-08, 'epoch': 0.96} + 96%|█████████▌| 11717/12188 [1:31:39<1:00:04, 7.65s/it] 96%|█████████▌| 11718/12188 [1:31:46<58:02, 7.41s/it] {'loss': 0.3242, 'grad_norm': 0.8123818800124006, 'learning_rate': 3.911403416925308e-08, 'epoch': 0.96} + 96%|█████████▌| 11718/12188 [1:31:46<58:02, 7.41s/it] 96%|█████████▌| 11719/12188 [1:31:56<1:04:26, 8.24s/it] {'loss': 0.3503, 'grad_norm': 0.7528800116670564, 'learning_rate': 3.894833690603839e-08, 'epoch': 0.96} + 96%|█████████▌| 11719/12188 [1:31:56<1:04:26, 8.24s/it] 96%|█████████▌| 11720/12188 [1:32:04<1:01:43, 7.91s/it] {'loss': 0.2631, 'grad_norm': 0.7247997644168037, 'learning_rate': 3.878298998432228e-08, 'epoch': 0.96} + 96%|█████████▌| 11720/12188 [1:32:04<1:01:43, 7.91s/it] 96%|█████████▌| 11721/12188 [1:32:11<59:30, 7.65s/it] {'loss': 0.2634, 'grad_norm': 0.7010302240638846, 'learning_rate': 3.8617993415779876e-08, 'epoch': 0.96} + 96%|█████████▌| 11721/12188 [1:32:11<59:30, 7.65s/it] 96%|█████████▌| 11722/12188 [1:32:18<58:15, 7.50s/it] {'loss': 0.3232, 'grad_norm': 0.7465370670149393, 'learning_rate': 3.8453347212064064e-08, 'epoch': 0.96} + 96%|█████████▌| 11722/12188 [1:32:18<58:15, 7.50s/it] 96%|█████████▌| 11723/12188 [1:32:25<58:20, 7.53s/it] {'loss': 0.2632, 'grad_norm': 0.7106447304857958, 'learning_rate': 3.82890513848011e-08, 'epoch': 0.96} + 96%|█████████▌| 11723/12188 [1:32:25<58:20, 7.53s/it] 96%|█████████▌| 11724/12188 [1:32:33<59:02, 7.64s/it] {'loss': 0.2822, 'grad_norm': 0.6538915332970158, 'learning_rate': 3.8125105945593935e-08, 'epoch': 0.96} + 96%|█████████▌| 11724/12188 [1:32:33<59:02, 7.64s/it] 96%|█████████▌| 11725/12188 [1:32:40<57:40, 7.47s/it] {'loss': 0.2884, 'grad_norm': 0.7473345668082163, 'learning_rate': 3.7961510906020516e-08, 'epoch': 0.96} + 96%|█████████▌| 11725/12188 [1:32:40<57:40, 7.47s/it] 96%|█████████▌| 11726/12188 [1:32:48<58:31, 7.60s/it] {'loss': 0.3188, 'grad_norm': 0.6783950191209044, 'learning_rate': 3.779826627763272e-08, 'epoch': 0.96} + 96%|█████████▌| 11726/12188 [1:32:48<58:31, 7.60s/it] 96%|█████████▌| 11727/12188 [1:32:57<1:00:04, 7.82s/it] {'loss': 0.2857, 'grad_norm': 0.7060792404062569, 'learning_rate': 3.763537207195855e-08, 'epoch': 0.96} + 96%|█████████▌| 11727/12188 [1:32:57<1:00:04, 7.82s/it] 96%|█████████▌| 11728/12188 [1:33:03<57:37, 7.52s/it] {'loss': 0.2851, 'grad_norm': 0.7548129935797843, 'learning_rate': 3.747282830050214e-08, 'epoch': 0.96} + 96%|█████████▌| 11728/12188 [1:33:03<57:37, 7.52s/it] 96%|█████████▌| 11729/12188 [1:33:10<56:25, 7.38s/it] {'loss': 0.2816, 'grad_norm': 0.8135253796606395, 'learning_rate': 3.731063497474152e-08, 'epoch': 0.96} + 96%|█████████▌| 11729/12188 [1:33:10<56:25, 7.38s/it] 96%|█████████▌| 11730/12188 [1:33:17<54:52, 7.19s/it] {'loss': 0.3085, 'grad_norm': 0.6785546383677722, 'learning_rate': 3.714879210613087e-08, 'epoch': 0.96} + 96%|█████████▌| 11730/12188 [1:33:17<54:52, 7.19s/it] 96%|█████████▋| 11731/12188 [1:33:24<53:42, 7.05s/it] {'loss': 0.2859, 'grad_norm': 0.6207256129285036, 'learning_rate': 3.698729970609882e-08, 'epoch': 0.96} + 96%|█████████▋| 11731/12188 [1:33:24<53:42, 7.05s/it] 96%|█████████▋| 11732/12188 [1:33:31<52:49, 6.95s/it] {'loss': 0.2846, 'grad_norm': 0.6677912238583815, 'learning_rate': 3.6826157786050144e-08, 'epoch': 0.96} + 96%|█████████▋| 11732/12188 [1:33:31<52:49, 6.95s/it] 96%|█████████▋| 11733/12188 [1:33:38<53:03, 7.00s/it] {'loss': 0.294, 'grad_norm': 0.6508187108988865, 'learning_rate': 3.666536635736406e-08, 'epoch': 0.96} + 96%|█████████▋| 11733/12188 [1:33:38<53:03, 7.00s/it] 96%|█████████▋| 11734/12188 [1:33:45<53:01, 7.01s/it] {'loss': 0.2841, 'grad_norm': 0.6966273977126326, 'learning_rate': 3.6504925431395946e-08, 'epoch': 0.96} + 96%|█████████▋| 11734/12188 [1:33:45<53:01, 7.01s/it] 96%|█████████▋| 11735/12188 [1:33:52<52:58, 7.02s/it] {'loss': 0.3075, 'grad_norm': 0.845028995198243, 'learning_rate': 3.634483501947561e-08, 'epoch': 0.96} + 96%|█████████▋| 11735/12188 [1:33:52<52:58, 7.02s/it] 96%|█████████▋| 11736/12188 [1:33:59<52:23, 6.96s/it] {'loss': 0.2955, 'grad_norm': 0.6776898405221016, 'learning_rate': 3.618509513290791e-08, 'epoch': 0.96} + 96%|█████████▋| 11736/12188 [1:33:59<52:23, 6.96s/it] 96%|█████████▋| 11737/12188 [1:34:06<53:33, 7.13s/it] {'loss': 0.2863, 'grad_norm': 0.6680946414971245, 'learning_rate': 3.602570578297382e-08, 'epoch': 0.96} + 96%|█████████▋| 11737/12188 [1:34:06<53:33, 7.13s/it] 96%|█████████▋| 11738/12188 [1:34:13<52:27, 6.99s/it] {'loss': 0.2827, 'grad_norm': 0.7678900626816927, 'learning_rate': 3.5866666980929334e-08, 'epoch': 0.96} + 96%|█████████▋| 11738/12188 [1:34:13<52:27, 6.99s/it] 96%|█████████▋| 11739/12188 [1:34:21<54:54, 7.34s/it] {'loss': 0.2538, 'grad_norm': 0.6375442256166426, 'learning_rate': 3.5707978738005464e-08, 'epoch': 0.96} + 96%|█████████▋| 11739/12188 [1:34:21<54:54, 7.34s/it] 96%|█████████▋| 11740/12188 [1:34:30<59:08, 7.92s/it] {'loss': 0.2987, 'grad_norm': 0.7872430560183962, 'learning_rate': 3.554964106540826e-08, 'epoch': 0.96} + 96%|█████████▋| 11740/12188 [1:34:30<59:08, 7.92s/it] 96%|█████████▋| 11741/12188 [1:34:38<59:25, 7.98s/it] {'loss': 0.3343, 'grad_norm': 0.786050070405375, 'learning_rate': 3.539165397431932e-08, 'epoch': 0.96} + 96%|█████████▋| 11741/12188 [1:34:38<59:25, 7.98s/it] 96%|█████████▋| 11742/12188 [1:34:45<56:52, 7.65s/it] {'loss': 0.2593, 'grad_norm': 0.6511413811686713, 'learning_rate': 3.5234017475895276e-08, 'epoch': 0.96} + 96%|█████████▋| 11742/12188 [1:34:45<56:52, 7.65s/it] 96%|█████████▋| 11743/12188 [1:34:52<54:44, 7.38s/it] {'loss': 0.2945, 'grad_norm': 0.7421111845474748, 'learning_rate': 3.5076731581268896e-08, 'epoch': 0.96} + 96%|█████████▋| 11743/12188 [1:34:52<54:44, 7.38s/it] 96%|█████████▋| 11744/12188 [1:34:59<53:22, 7.21s/it] {'loss': 0.2585, 'grad_norm': 0.6881609420169462, 'learning_rate': 3.491979630154685e-08, 'epoch': 0.96} + 96%|█████████▋| 11744/12188 [1:34:59<53:22, 7.21s/it] 96%|█████████▋| 11745/12188 [1:35:07<56:23, 7.64s/it] {'loss': 0.3024, 'grad_norm': 0.7763899879764847, 'learning_rate': 3.4763211647811377e-08, 'epoch': 0.96} + 96%|█████████▋| 11745/12188 [1:35:07<56:23, 7.64s/it] 96%|█████████▋| 11746/12188 [1:35:15<56:38, 7.69s/it] {'loss': 0.3032, 'grad_norm': 0.7134615615015013, 'learning_rate': 3.460697763112142e-08, 'epoch': 0.96} + 96%|█████████▋| 11746/12188 [1:35:15<56:38, 7.69s/it] 96%|█████████▋| 11747/12188 [1:35:22<55:12, 7.51s/it] {'loss': 0.2614, 'grad_norm': 0.6733449933860928, 'learning_rate': 3.44510942625087e-08, 'epoch': 0.96} + 96%|█████████▋| 11747/12188 [1:35:22<55:12, 7.51s/it] 96%|█████████▋| 11748/12188 [1:35:29<53:43, 7.33s/it] {'loss': 0.3339, 'grad_norm': 0.7919627584882984, 'learning_rate': 3.429556155298219e-08, 'epoch': 0.96} + 96%|█████████▋| 11748/12188 [1:35:29<53:43, 7.33s/it] 96%|█████████▋| 11749/12188 [1:35:36<52:28, 7.17s/it] {'loss': 0.3066, 'grad_norm': 0.7106890964790411, 'learning_rate': 3.414037951352478e-08, 'epoch': 0.96} + 96%|█████████▋| 11749/12188 [1:35:36<52:28, 7.17s/it] 96%|█████████▋| 11750/12188 [1:35:43<51:45, 7.09s/it] {'loss': 0.3207, 'grad_norm': 0.7372318076307929, 'learning_rate': 3.398554815509547e-08, 'epoch': 0.96} + 96%|█████████▋| 11750/12188 [1:35:43<51:45, 7.09s/it] 96%|█████████▋| 11751/12188 [1:35:51<53:46, 7.38s/it] {'loss': 0.2909, 'grad_norm': 0.6521759115305312, 'learning_rate': 3.38310674886283e-08, 'epoch': 0.96} + 96%|█████████▋| 11751/12188 [1:35:51<53:46, 7.38s/it] 96%|█████████▋| 11752/12188 [1:35:58<52:09, 7.18s/it] {'loss': 0.2929, 'grad_norm': 0.755242996914659, 'learning_rate': 3.3676937525032314e-08, 'epoch': 0.96} + 96%|█████████▋| 11752/12188 [1:35:58<52:09, 7.18s/it] 96%|█████████▋| 11753/12188 [1:36:05<52:16, 7.21s/it] {'loss': 0.3359, 'grad_norm': 0.7175926116138578, 'learning_rate': 3.35231582751927e-08, 'epoch': 0.96} + 96%|█████████▋| 11753/12188 [1:36:05<52:16, 7.21s/it] 96%|█████████▋| 11754/12188 [1:36:12<51:26, 7.11s/it] {'loss': 0.3411, 'grad_norm': 0.7023878600003186, 'learning_rate': 3.336972974996799e-08, 'epoch': 0.96} + 96%|█████████▋| 11754/12188 [1:36:12<51:26, 7.11s/it] 96%|█████████▋| 11755/12188 [1:36:19<51:06, 7.08s/it] {'loss': 0.2655, 'grad_norm': 0.7229942370058076, 'learning_rate': 3.321665196019286e-08, 'epoch': 0.96} + 96%|█████████▋| 11755/12188 [1:36:19<51:06, 7.08s/it] 96%|█████████▋| 11756/12188 [1:36:26<51:23, 7.14s/it] {'loss': 0.2891, 'grad_norm': 0.6818707484317494, 'learning_rate': 3.306392491667865e-08, 'epoch': 0.96} + 96%|█████████▋| 11756/12188 [1:36:26<51:23, 7.14s/it] 96%|█████████▋| 11757/12188 [1:36:33<50:29, 7.03s/it] {'loss': 0.3027, 'grad_norm': 0.752229842732902, 'learning_rate': 3.291154863021007e-08, 'epoch': 0.96} + 96%|█████████▋| 11757/12188 [1:36:33<50:29, 7.03s/it] 96%|█████████▋| 11758/12188 [1:36:41<52:15, 7.29s/it] {'loss': 0.2713, 'grad_norm': 0.7589336530707111, 'learning_rate': 3.27595231115474e-08, 'epoch': 0.96} + 96%|█████████▋| 11758/12188 [1:36:41<52:15, 7.29s/it] 96%|█████████▋| 11759/12188 [1:36:48<52:10, 7.30s/it] {'loss': 0.2705, 'grad_norm': 0.6206814716972496, 'learning_rate': 3.260784837142705e-08, 'epoch': 0.96} + 96%|█████████▋| 11759/12188 [1:36:48<52:10, 7.30s/it] 96%|█████████▋| 11760/12188 [1:36:55<51:10, 7.17s/it] {'loss': 0.3131, 'grad_norm': 0.6655009491105522, 'learning_rate': 3.2456524420559354e-08, 'epoch': 0.96} + 96%|█████████▋| 11760/12188 [1:36:55<51:10, 7.17s/it] 96%|█████████▋| 11761/12188 [1:37:02<51:01, 7.17s/it] {'loss': 0.3356, 'grad_norm': 0.7306038228578531, 'learning_rate': 3.230555126963131e-08, 'epoch': 0.96} + 96%|█████████▋| 11761/12188 [1:37:02<51:01, 7.17s/it] 97%|█████████▋| 11762/12188 [1:37:10<52:01, 7.33s/it] {'loss': 0.3105, 'grad_norm': 0.792511534438117, 'learning_rate': 3.215492892930383e-08, 'epoch': 0.97} + 97%|█████████▋| 11762/12188 [1:37:10<52:01, 7.33s/it] 97%|█████████▋| 11763/12188 [1:37:17<50:42, 7.16s/it] {'loss': 0.3007, 'grad_norm': 0.7171107216514528, 'learning_rate': 3.200465741021341e-08, 'epoch': 0.97} + 97%|█████████▋| 11763/12188 [1:37:17<50:42, 7.16s/it] 97%|█████████▋| 11764/12188 [1:37:23<49:35, 7.02s/it] {'loss': 0.3406, 'grad_norm': 0.8385163417990587, 'learning_rate': 3.185473672297323e-08, 'epoch': 0.97} + 97%|█████████▋| 11764/12188 [1:37:23<49:35, 7.02s/it] 97%|█████████▋| 11765/12188 [1:37:30<48:59, 6.95s/it] {'loss': 0.3404, 'grad_norm': 0.7705231704589811, 'learning_rate': 3.170516687816871e-08, 'epoch': 0.97} + 97%|█████████▋| 11765/12188 [1:37:30<48:59, 6.95s/it] 97%|█████████▋| 11766/12188 [1:37:38<50:20, 7.16s/it] {'loss': 0.2777, 'grad_norm': 0.6696917841498552, 'learning_rate': 3.1555947886363626e-08, 'epoch': 0.97} + 97%|█████████▋| 11766/12188 [1:37:38<50:20, 7.16s/it] 97%|█████████▋| 11767/12188 [1:37:45<49:58, 7.12s/it] {'loss': 0.3404, 'grad_norm': 0.8862816763197509, 'learning_rate': 3.1407079758095646e-08, 'epoch': 0.97} + 97%|█████████▋| 11767/12188 [1:37:45<49:58, 7.12s/it] 97%|█████████▋| 11768/12188 [1:37:55<57:02, 8.15s/it] {'loss': 0.3537, 'grad_norm': 0.6993490924722466, 'learning_rate': 3.125856250387638e-08, 'epoch': 0.97} + 97%|█████████▋| 11768/12188 [1:37:55<57:02, 8.15s/it] 97%|█████████▋| 11769/12188 [1:38:02<54:39, 7.83s/it] {'loss': 0.2755, 'grad_norm': 0.7393946356683163, 'learning_rate': 3.111039613419464e-08, 'epoch': 0.97} + 97%|█████████▋| 11769/12188 [1:38:02<54:39, 7.83s/it] 97%|█████████▋| 11770/12188 [1:38:10<53:19, 7.65s/it] {'loss': 0.3267, 'grad_norm': 0.677926395073854, 'learning_rate': 3.096258065951319e-08, 'epoch': 0.97} + 97%|█████████▋| 11770/12188 [1:38:10<53:19, 7.65s/it] 97%|█████████▋| 11771/12188 [1:38:20<58:44, 8.45s/it] {'loss': 0.2776, 'grad_norm': 0.662704497834901, 'learning_rate': 3.081511609027144e-08, 'epoch': 0.97} + 97%|█████████▋| 11771/12188 [1:38:20<58:44, 8.45s/it] 97%|█████████▋| 11772/12188 [1:38:27<55:04, 7.94s/it] {'loss': 0.2987, 'grad_norm': 0.729493825511382, 'learning_rate': 3.066800243688273e-08, 'epoch': 0.97} + 97%|█████████▋| 11772/12188 [1:38:27<55:04, 7.94s/it] 97%|█████████▋| 11773/12188 [1:38:34<52:38, 7.61s/it] {'loss': 0.2921, 'grad_norm': 0.6472644606268896, 'learning_rate': 3.052123970973542e-08, 'epoch': 0.97} + 97%|█████████▋| 11773/12188 [1:38:34<52:38, 7.61s/it] 97%|█████████▋| 11774/12188 [1:38:41<51:21, 7.44s/it] {'loss': 0.3166, 'grad_norm': 0.7860849048700611, 'learning_rate': 3.0374827919193994e-08, 'epoch': 0.97} + 97%|█████████▋| 11774/12188 [1:38:41<51:21, 7.44s/it] 97%|█████████▋| 11775/12188 [1:38:48<50:16, 7.30s/it] {'loss': 0.3118, 'grad_norm': 0.7124819771277073, 'learning_rate': 3.022876707559796e-08, 'epoch': 0.97} + 97%|█████████▋| 11775/12188 [1:38:48<50:16, 7.30s/it] 97%|█████████▋| 11776/12188 [1:38:55<49:42, 7.24s/it] {'loss': 0.2795, 'grad_norm': 0.7069588843598902, 'learning_rate': 3.008305718926241e-08, 'epoch': 0.97} + 97%|█████████▋| 11776/12188 [1:38:55<49:42, 7.24s/it] 97%|█████████▋| 11777/12188 [1:39:01<48:09, 7.03s/it] {'loss': 0.3299, 'grad_norm': 0.6803705925197974, 'learning_rate': 2.9937698270476324e-08, 'epoch': 0.97} + 97%|█████████▋| 11777/12188 [1:39:01<48:09, 7.03s/it] 97%|█████████▋| 11778/12188 [1:39:08<48:01, 7.03s/it] {'loss': 0.2965, 'grad_norm': 0.878665252328106, 'learning_rate': 2.979269032950427e-08, 'epoch': 0.97} + 97%|█████████▋| 11778/12188 [1:39:08<48:01, 7.03s/it] 97%|█████████▋| 11779/12188 [1:39:15<47:14, 6.93s/it] {'loss': 0.3234, 'grad_norm': 0.7169325707247026, 'learning_rate': 2.9648033376588058e-08, 'epoch': 0.97} + 97%|█████████▋| 11779/12188 [1:39:15<47:14, 6.93s/it] 97%|█████████▋| 11780/12188 [1:39:22<48:07, 7.08s/it] {'loss': 0.3053, 'grad_norm': 0.6283745634450767, 'learning_rate': 2.950372742194174e-08, 'epoch': 0.97} + 97%|█████████▋| 11780/12188 [1:39:22<48:07, 7.08s/it] 97%|█████████▋| 11781/12188 [1:39:30<49:31, 7.30s/it] {'loss': 0.2982, 'grad_norm': 0.7296631494727609, 'learning_rate': 2.9359772475757164e-08, 'epoch': 0.97} + 97%|█████████▋| 11781/12188 [1:39:30<49:31, 7.30s/it] 97%|█████████▋| 11782/12188 [1:39:38<50:54, 7.52s/it] {'loss': 0.2975, 'grad_norm': 0.69111747110214, 'learning_rate': 2.9216168548198975e-08, 'epoch': 0.97} + 97%|█████████▋| 11782/12188 [1:39:38<50:54, 7.52s/it] 97%|█████████▋| 11783/12188 [1:39:46<50:37, 7.50s/it] {'loss': 0.2798, 'grad_norm': 0.6549426266176633, 'learning_rate': 2.9072915649408505e-08, 'epoch': 0.97} + 97%|█████████▋| 11783/12188 [1:39:46<50:37, 7.50s/it] 97%|█████████▋| 11784/12188 [1:39:53<49:42, 7.38s/it] {'loss': 0.315, 'grad_norm': 0.6580096483406868, 'learning_rate': 2.893001378950322e-08, 'epoch': 0.97} + 97%|█████████▋| 11784/12188 [1:39:53<49:42, 7.38s/it] 97%|█████████▋| 11785/12188 [1:39:59<48:06, 7.16s/it] {'loss': 0.2773, 'grad_norm': 0.7350530230593778, 'learning_rate': 2.8787462978572823e-08, 'epoch': 0.97} + 97%|█████████▋| 11785/12188 [1:39:59<48:06, 7.16s/it] 97%|█████████▋| 11786/12188 [1:40:06<47:23, 7.07s/it] {'loss': 0.2877, 'grad_norm': 0.6964751251759107, 'learning_rate': 2.864526322668537e-08, 'epoch': 0.97} + 97%|█████████▋| 11786/12188 [1:40:06<47:23, 7.07s/it] 97%|█████████▋| 11787/12188 [1:40:13<47:30, 7.11s/it] {'loss': 0.2985, 'grad_norm': 0.6941564898636109, 'learning_rate': 2.850341454388228e-08, 'epoch': 0.97} + 97%|█████████▋| 11787/12188 [1:40:14<47:30, 7.11s/it] 97%|█████████▋| 11788/12188 [1:40:21<47:12, 7.08s/it] {'loss': 0.2882, 'grad_norm': 0.6885220531607419, 'learning_rate': 2.8361916940180534e-08, 'epoch': 0.97} + 97%|█████████▋| 11788/12188 [1:40:21<47:12, 7.08s/it] 97%|█████████▋| 11789/12188 [1:40:27<46:09, 6.94s/it] {'loss': 0.2971, 'grad_norm': 0.6921205814059361, 'learning_rate': 2.8220770425573253e-08, 'epoch': 0.97} + 97%|█████████▋| 11789/12188 [1:40:27<46:09, 6.94s/it] 97%|█████████▋| 11790/12188 [1:40:34<45:30, 6.86s/it] {'loss': 0.3006, 'grad_norm': 0.7052810723621976, 'learning_rate': 2.8079975010026904e-08, 'epoch': 0.97} + 97%|█████████▋| 11790/12188 [1:40:34<45:30, 6.86s/it][2025-08-18 16:50:09,621] [WARNING] [stage3.py:2118:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time + 97%|█████████▋| 11791/12188 [1:40:44<52:47, 7.98s/it] {'loss': 0.302, 'grad_norm': 0.6798644373749868, 'learning_rate': 2.7939530703484652e-08, 'epoch': 0.97} + 97%|█████████▋| 11791/12188 [1:40:44<52:47, 7.98s/it] 97%|█████████▋| 11792/12188 [1:40:53<53:09, 8.05s/it] {'loss': 0.2712, 'grad_norm': 0.6866636672044208, 'learning_rate': 2.7799437515864668e-08, 'epoch': 0.97} + 97%|█████████▋| 11792/12188 [1:40:53<53:09, 8.05s/it] 97%|█████████▋| 11793/12188 [1:41:00<51:06, 7.76s/it] {'loss': 0.2817, 'grad_norm': 0.6877494260803598, 'learning_rate': 2.76596954570596e-08, 'epoch': 0.97} + 97%|█████████▋| 11793/12188 [1:41:00<51:06, 7.76s/it] 97%|█████████▋| 11794/12188 [1:41:07<49:24, 7.52s/it] {'loss': 0.2938, 'grad_norm': 0.7097302935883361, 'learning_rate': 2.752030453693877e-08, 'epoch': 0.97} + 97%|█████████▋| 11794/12188 [1:41:07<49:24, 7.52s/it] 97%|█████████▋| 11795/12188 [1:41:14<48:45, 7.44s/it] {'loss': 0.2852, 'grad_norm': 0.6919936903008422, 'learning_rate': 2.7381264765344862e-08, 'epoch': 0.97} + 97%|█████████▋| 11795/12188 [1:41:14<48:45, 7.44s/it] 97%|█████████▋| 11796/12188 [1:41:21<47:04, 7.20s/it] {'loss': 0.2965, 'grad_norm': 0.6832048400551579, 'learning_rate': 2.7242576152097245e-08, 'epoch': 0.97} + 97%|█████████▋| 11796/12188 [1:41:21<47:04, 7.20s/it] 97%|█████████▋| 11797/12188 [1:41:28<46:42, 7.17s/it] {'loss': 0.2951, 'grad_norm': 0.6679693334629742, 'learning_rate': 2.7104238706989194e-08, 'epoch': 0.97} + 97%|█████████▋| 11797/12188 [1:41:28<46:42, 7.17s/it] 97%|█████████▋| 11798/12188 [1:41:35<46:46, 7.20s/it] {'loss': 0.3043, 'grad_norm': 0.7568341298954862, 'learning_rate': 2.696625243979012e-08, 'epoch': 0.97} + 97%|█████████▋| 11798/12188 [1:41:35<46:46, 7.20s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f9223f2ce50> +[Try #0] Failed to fetch sample 4817762 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f9223f2ce50> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Shop by category'"}, {'from': 'gpt', 'value': '\nclick(x=0.3395, y=0.1945)\n'}]} + 97%|█████████▋| 11799/12188 [1:41:42<46:03, 7.11s/it] {'loss': 0.2557, 'grad_norm': 0.6423290708238074, 'learning_rate': 2.6828617360244448e-08, 'epoch': 0.97} + 97%|█████████▋| 11799/12188 [1:41:42<46:03, 7.11s/it] 97%|█████████▋| 11800/12188 [1:41:49<46:59, 7.27s/it] {'loss': 0.2934, 'grad_norm': 0.6313931334607796, 'learning_rate': 2.669133347807218e-08, 'epoch': 0.97} + 97%|█████████▋| 11800/12188 [1:41:49<46:59, 7.27s/it] 97%|█████████▋| 11801/12188 [1:41:58<49:22, 7.65s/it] {'loss': 0.2972, 'grad_norm': 0.7950250005711987, 'learning_rate': 2.6554400802967785e-08, 'epoch': 0.97} + 97%|█████████▋| 11801/12188 [1:41:58<49:22, 7.65s/it] 97%|█████████▋| 11802/12188 [1:42:05<48:22, 7.52s/it] {'loss': 0.3381, 'grad_norm': 0.6798707153695068, 'learning_rate': 2.6417819344600747e-08, 'epoch': 0.97} + 97%|█████████▋| 11802/12188 [1:42:05<48:22, 7.52s/it] 97%|█████████▋| 11803/12188 [1:42:13<47:51, 7.46s/it] {'loss': 0.3027, 'grad_norm': 0.7757181834126667, 'learning_rate': 2.628158911261669e-08, 'epoch': 0.97} + 97%|█████████▋| 11803/12188 [1:42:13<47:51, 7.46s/it] 97%|█████████▋| 11804/12188 [1:42:20<47:27, 7.41s/it] {'loss': 0.3275, 'grad_norm': 0.8659731902012695, 'learning_rate': 2.6145710116636246e-08, 'epoch': 0.97} + 97%|█████████▋| 11804/12188 [1:42:20<47:27, 7.41s/it] 97%|█████████▋| 11805/12188 [1:42:27<47:37, 7.46s/it] {'loss': 0.2963, 'grad_norm': 0.7225607945120792, 'learning_rate': 2.6010182366254523e-08, 'epoch': 0.97} + 97%|█████████▋| 11805/12188 [1:42:27<47:37, 7.46s/it] 97%|█████████▋| 11806/12188 [1:42:35<47:34, 7.47s/it] {'loss': 0.2671, 'grad_norm': 0.953055951246137, 'learning_rate': 2.5875005871042192e-08, 'epoch': 0.97} + 97%|█████████▋| 11806/12188 [1:42:35<47:34, 7.47s/it] 97%|█████████▋| 11807/12188 [1:42:42<45:49, 7.22s/it] {'loss': 0.2927, 'grad_norm': 0.6310340971942096, 'learning_rate': 2.574018064054551e-08, 'epoch': 0.97} + 97%|█████████▋| 11807/12188 [1:42:42<45:49, 7.22s/it] 97%|█████████▋| 11808/12188 [1:42:49<45:42, 7.22s/it] {'loss': 0.2944, 'grad_norm': 1.5540254564076958, 'learning_rate': 2.5605706684285747e-08, 'epoch': 0.97} + 97%|█████████▋| 11808/12188 [1:42:49<45:42, 7.22s/it] 97%|█████████▋| 11809/12188 [1:42:59<50:25, 7.98s/it] {'loss': 0.3012, 'grad_norm': 0.6723836895555607, 'learning_rate': 2.5471584011758645e-08, 'epoch': 0.97} + 97%|█████████▋| 11809/12188 [1:42:59<50:25, 7.98s/it] 97%|█████████▋| 11810/12188 [1:43:08<52:21, 8.31s/it] {'loss': 0.3489, 'grad_norm': 0.7969112359996621, 'learning_rate': 2.533781263243662e-08, 'epoch': 0.97} + 97%|█████████▋| 11810/12188 [1:43:08<52:21, 8.31s/it] 97%|█████████▋| 11811/12188 [1:43:15<49:50, 7.93s/it] {'loss': 0.311, 'grad_norm': 0.7377409913445764, 'learning_rate': 2.5204392555765456e-08, 'epoch': 0.97} + 97%|█████████▋| 11811/12188 [1:43:15<49:50, 7.93s/it] 97%|█████████▋| 11812/12188 [1:43:23<49:34, 7.91s/it] {'loss': 0.3056, 'grad_norm': 0.6498146189159109, 'learning_rate': 2.5071323791167058e-08, 'epoch': 0.97} + 97%|█████████▋| 11812/12188 [1:43:23<49:34, 7.91s/it] 97%|█████████▋| 11813/12188 [1:43:30<48:11, 7.71s/it] {'loss': 0.282, 'grad_norm': 0.8224342196354298, 'learning_rate': 2.4938606348040017e-08, 'epoch': 0.97} + 97%|█████████▋| 11813/12188 [1:43:30<48:11, 7.71s/it] 97%|█████████▋| 11814/12188 [1:43:36<46:10, 7.41s/it] {'loss': 0.3027, 'grad_norm': 0.8382311203285364, 'learning_rate': 2.4806240235754618e-08, 'epoch': 0.97} + 97%|█████████▋| 11814/12188 [1:43:36<46:10, 7.41s/it] 97%|█████████▋| 11815/12188 [1:43:44<46:41, 7.51s/it] {'loss': 0.2973, 'grad_norm': 0.7443111582025659, 'learning_rate': 2.4674225463659495e-08, 'epoch': 0.97} + 97%|█████████▋| 11815/12188 [1:43:44<46:41, 7.51s/it] 97%|█████████▋| 11816/12188 [1:43:54<50:24, 8.13s/it] {'loss': 0.2945, 'grad_norm': 0.6406130701652595, 'learning_rate': 2.454256204107719e-08, 'epoch': 0.97} + 97%|█████████▋| 11816/12188 [1:43:54<50:24, 8.13s/it] 97%|█████████▋| 11817/12188 [1:44:01<48:51, 7.90s/it] {'loss': 0.2844, 'grad_norm': 0.634781590829692, 'learning_rate': 2.4411249977305264e-08, 'epoch': 0.97} + 97%|█████████▋| 11817/12188 [1:44:01<48:51, 7.90s/it] 97%|█████████▋| 11818/12188 [1:44:08<46:09, 7.48s/it] {'loss': 0.3094, 'grad_norm': 0.6858034939825924, 'learning_rate': 2.4280289281617407e-08, 'epoch': 0.97} + 97%|█████████▋| 11818/12188 [1:44:08<46:09, 7.48s/it] 97%|█████████▋| 11819/12188 [1:44:16<46:53, 7.62s/it] {'loss': 0.2855, 'grad_norm': 0.7267791161192556, 'learning_rate': 2.414967996326123e-08, 'epoch': 0.97} + 97%|█████████▋| 11819/12188 [1:44:16<46:53, 7.62s/it] 97%|█████████▋| 11820/12188 [1:44:23<46:50, 7.64s/it] {'loss': 0.2873, 'grad_norm': 0.73138126155266, 'learning_rate': 2.401942203146046e-08, 'epoch': 0.97} + 97%|█████████▋| 11820/12188 [1:44:23<46:50, 7.64s/it] 97%|█████████▋| 11821/12188 [1:44:31<45:59, 7.52s/it] {'loss': 0.2882, 'grad_norm': 0.7296401303195521, 'learning_rate': 2.3889515495413297e-08, 'epoch': 0.97} + 97%|█████████▋| 11821/12188 [1:44:31<45:59, 7.52s/it] 97%|███��█████▋| 11822/12188 [1:44:38<45:42, 7.49s/it] {'loss': 0.2779, 'grad_norm': 0.7037825890315239, 'learning_rate': 2.3759960364294067e-08, 'epoch': 0.97} + 97%|█████████▋| 11822/12188 [1:44:38<45:42, 7.49s/it] 97%|█████████▋| 11823/12188 [1:44:45<44:47, 7.36s/it] {'loss': 0.2897, 'grad_norm': 1.0557735220195552, 'learning_rate': 2.363075664725156e-08, 'epoch': 0.97} + 97%|█████████▋| 11823/12188 [1:44:45<44:47, 7.36s/it] 97%|█████████▋| 11824/12188 [1:44:52<44:13, 7.29s/it] {'loss': 0.2951, 'grad_norm': 0.6618072490951737, 'learning_rate': 2.3501904353409598e-08, 'epoch': 0.97} + 97%|█████████▋| 11824/12188 [1:44:52<44:13, 7.29s/it] 97%|█████████▋| 11825/12188 [1:44:59<43:38, 7.21s/it] {'loss': 0.3155, 'grad_norm': 0.750495658840942, 'learning_rate': 2.3373403491867562e-08, 'epoch': 0.97} + 97%|█████████▋| 11825/12188 [1:44:59<43:38, 7.21s/it] 97%|█████████▋| 11826/12188 [1:45:07<44:21, 7.35s/it] {'loss': 0.2712, 'grad_norm': 0.6969650192109016, 'learning_rate': 2.3245254071700418e-08, 'epoch': 0.97} + 97%|█████████▋| 11826/12188 [1:45:07<44:21, 7.35s/it] 97%|█████████▋| 11827/12188 [1:45:13<42:56, 7.14s/it] {'loss': 0.3321, 'grad_norm': 0.7281143513017664, 'learning_rate': 2.3117456101958148e-08, 'epoch': 0.97} + 97%|█████████▋| 11827/12188 [1:45:13<42:56, 7.14s/it] 97%|█████████▋| 11828/12188 [1:45:21<43:49, 7.30s/it] {'loss': 0.3057, 'grad_norm': 0.7061728915646422, 'learning_rate': 2.2990009591664642e-08, 'epoch': 0.97} + 97%|█████████▋| 11828/12188 [1:45:21<43:49, 7.30s/it] 97%|█████████▋| 11829/12188 [1:45:30<46:42, 7.81s/it] {'loss': 0.3193, 'grad_norm': 0.713821284079565, 'learning_rate': 2.286291454982048e-08, 'epoch': 0.97} + 97%|█████████▋| 11829/12188 [1:45:30<46:42, 7.81s/it] 97%|█████████▋| 11830/12188 [1:45:37<45:00, 7.54s/it] {'loss': 0.2879, 'grad_norm': 0.7063627080034044, 'learning_rate': 2.2736170985401262e-08, 'epoch': 0.97} + 97%|█████████▋| 11830/12188 [1:45:37<45:00, 7.54s/it] 97%|█████████▋| 11831/12188 [1:45:44<43:14, 7.27s/it] {'loss': 0.2996, 'grad_norm': 0.7348015274340627, 'learning_rate': 2.260977890735705e-08, 'epoch': 0.97} + 97%|█████████▋| 11831/12188 [1:45:44<43:14, 7.27s/it] 97%|█████████▋| 11832/12188 [1:45:51<43:27, 7.32s/it] {'loss': 0.3075, 'grad_norm': 0.7338880385275606, 'learning_rate': 2.2483738324612924e-08, 'epoch': 0.97} + 97%|█████████▋| 11832/12188 [1:45:51<43:27, 7.32s/it] 97%|█████████▋| 11833/12188 [1:45:58<42:06, 7.12s/it] {'loss': 0.325, 'grad_norm': 0.7949081672537549, 'learning_rate': 2.2358049246070658e-08, 'epoch': 0.97} + 97%|█████████▋| 11833/12188 [1:45:58<42:06, 7.12s/it] 97%|█████████▋| 11834/12188 [1:46:04<40:47, 6.91s/it] {'loss': 0.2743, 'grad_norm': 0.7143312147230727, 'learning_rate': 2.2232711680605368e-08, 'epoch': 0.97} + 97%|█████████▋| 11834/12188 [1:46:04<40:47, 6.91s/it] 97%|█████████▋| 11835/12188 [1:46:11<40:07, 6.82s/it] {'loss': 0.3284, 'grad_norm': 0.7166171716232623, 'learning_rate': 2.210772563706942e-08, 'epoch': 0.97} + 97%|█████████▋| 11835/12188 [1:46:11<40:07, 6.82s/it] 97%|█████████▋| 11836/12188 [1:46:18<40:52, 6.97s/it] {'loss': 0.3187, 'grad_norm': 0.6776384505198192, 'learning_rate': 2.1983091124287426e-08, 'epoch': 0.97} + 97%|█████████▋| 11836/12188 [1:46:18<40:52, 6.97s/it] 97%|█████████▋| 11837/12188 [1:46:25<41:24, 7.08s/it] {'loss': 0.2815, 'grad_norm': 0.6712364330677335, 'learning_rate': 2.185880815106234e-08, 'epoch': 0.97} + 97%|█████████▋| 11837/12188 [1:46:25<41:24, 7.08s/it] 97%|█████████▋| 11838/12188 [1:46:32<41:06, 7.05s/it] {'loss': 0.3005, 'grad_norm': 0.6325749718510343, 'learning_rate': 2.1734876726169918e-08, 'epoch': 0.97} + 97%|█████████▋| 11838/12188 [1:46:32<41:06, 7.05s/it] 97%|█████████▋| 11839/12188 [1:46:39<40:34, 6.98s/it] {'loss': 0.3002, 'grad_norm': 0.7162504021261499, 'learning_rate': 2.1611296858362052e-08, 'epoch': 0.97} + 97%|█████████▋| 11839/12188 [1:46:39<40:34, 6.98s/it] 97%|█████████▋| 11840/12188 [1:46:50<47:52, 8.25s/it] {'loss': 0.2981, 'grad_norm': 0.7025030932943198, 'learning_rate': 2.1488068556366205e-08, 'epoch': 0.97} + 97%|█████████▋| 11840/12188 [1:46:51<47:52, 8.25s/it] 97%|█████████▋| 11841/12188 [1:46:58<47:07, 8.15s/it] {'loss': 0.2923, 'grad_norm': 0.6701182497615539, 'learning_rate': 2.1365191828884857e-08, 'epoch': 0.97} + 97%|█████████▋| 11841/12188 [1:46:58<47:07, 8.15s/it] 97%|█████████▋| 11842/12188 [1:47:06<45:13, 7.84s/it] {'loss': 0.3068, 'grad_norm': 0.6949333518451069, 'learning_rate': 2.1242666684594405e-08, 'epoch': 0.97} + 97%|█████████▋| 11842/12188 [1:47:06<45:13, 7.84s/it] 97%|█████████▋| 11843/12188 [1:47:13<45:07, 7.85s/it] {'loss': 0.3137, 'grad_norm': 0.724055933209844, 'learning_rate': 2.1120493132147924e-08, 'epoch': 0.97} + 97%|█████████▋| 11843/12188 [1:47:13<45:07, 7.85s/it] 97%|█████████▋| 11844/12188 [1:47:20<43:13, 7.54s/it] {'loss': 0.3044, 'grad_norm': 0.736835639427235, 'learning_rate': 2.0998671180172957e-08, 'epoch': 0.97} + 97%|█████████▋| 11844/12188 [1:47:20<43:13, 7.54s/it] 97%|█████████▋| 11845/12188 [1:47:27<41:34, 7.27s/it] {'loss': 0.3083, 'grad_norm': 0.7729704185804539, 'learning_rate': 2.0877200837272626e-08, 'epoch': 0.97} + 97%|█████████▋| 11845/12188 [1:47:27<41:34, 7.27s/it] 97%|█████████▋| 11846/12188 [1:47:34<41:07, 7.22s/it] {'loss': 0.2932, 'grad_norm': 0.6845704625059981, 'learning_rate': 2.075608211202451e-08, 'epoch': 0.97} + 97%|█████████▋| 11846/12188 [1:47:34<41:07, 7.22s/it] 97%|█████████▋| 11847/12188 [1:47:43<43:18, 7.62s/it] {'loss': 0.3025, 'grad_norm': 0.7183856810931386, 'learning_rate': 2.0635315012982325e-08, 'epoch': 0.97} + 97%|█████████▋| 11847/12188 [1:47:43<43:18, 7.62s/it] 97%|█████████▋| 11848/12188 [1:47:50<42:09, 7.44s/it] {'loss': 0.2691, 'grad_norm': 0.6862724611896376, 'learning_rate': 2.051489954867425e-08, 'epoch': 0.97} + 97%|█████████▋| 11848/12188 [1:47:50<42:09, 7.44s/it] 97%|█████████▋| 11849/12188 [1:47:57<41:34, 7.36s/it] {'loss': 0.3105, 'grad_norm': 0.6903545141620331, 'learning_rate': 2.039483572760348e-08, 'epoch': 0.97} + 97%|█████████▋| 11849/12188 [1:47:57<41:34, 7.36s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fea850aefc0> +[Try #0] Failed to fetch sample 4692136 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fea850aefc0> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Picture 1 of 18'"}, {'from': 'gpt', 'value': '\nclick(x=0.245, y=0.4295)\n'}]} + 97%|█████████▋| 11850/12188 [1:48:05<43:30, 7.72s/it] {'loss': 0.3304, 'grad_norm': 0.7362216071036439, 'learning_rate': 2.02751235582499e-08, 'epoch': 0.97} + 97%|█████████▋| 11850/12188 [1:48:05<43:30, 7.72s/it] 97%|█████████▋| 11851/12188 [1:48:12<41:34, 7.40s/it] {'loss': 0.2775, 'grad_norm': 0.7299063139015908, 'learning_rate': 2.015576304906619e-08, 'epoch': 0.97} + 97%|█████████▋| 11851/12188 [1:48:12<41:34, 7.40s/it] 97%|█████████▋| 11852/12188 [1:48:19<41:00, 7.32s/it] {'loss': 0.3159, 'grad_norm': 0.7277331529635913, 'learning_rate': 2.003675420848117e-08, 'epoch': 0.97} + 97%|█████████▋| 11852/12188 [1:48:19<41:00, 7.32s/it] 97%|█████████▋| 11853/12188 [1:48:27<41:13, 7.38s/it] {'loss': 0.2908, 'grad_norm': 0.7092858702308572, 'learning_rate': 1.991809704490033e-08, 'epoch': 0.97} + 97%|█████████▋| 11853/12188 [1:48:27<41:13, 7.38s/it] 97%|█████████▋| 11854/12188 [1:48:33<40:05, 7.20s/it] {'loss': 0.3239, 'grad_norm': 0.6408394100622983, 'learning_rate': 1.9799791566701975e-08, 'epoch': 0.97} + 97%|█████████▋| 11854/12188 [1:48:33<40:05, 7.20s/it] 97%|█████████▋| 11855/12188 [1:48:40<39:17, 7.08s/it] {'loss': 0.3467, 'grad_norm': 0.6606955613585772, 'learning_rate': 1.9681837782241086e-08, 'epoch': 0.97} + 97%|█████████▋| 11855/12188 [1:48:40<39:17, 7.08s/it] 97%|█████████▋| 11856/12188 [1:48:47<39:31, 7.14s/it] {'loss': 0.3047, 'grad_norm': 0.7239352704709403, 'learning_rate': 1.9564235699847666e-08, 'epoch': 0.97} + 97%|█████████▋| 11856/12188 [1:48:47<39:31, 7.14s/it] 97%|█████████▋| 11857/12188 [1:48:55<39:48, 7.22s/it] {'loss': 0.2792, 'grad_norm': 0.7289802219042683, 'learning_rate': 1.944698532782563e-08, 'epoch': 0.97} + 97%|█████████▋| 11857/12188 [1:48:55<39:48, 7.22s/it] 97%|█████████▋| 11858/12188 [1:49:01<38:33, 7.01s/it] {'loss': 0.3305, 'grad_norm': 0.7528507610630722, 'learning_rate': 1.9330086674456128e-08, 'epoch': 0.97} + 97%|█████████▋| 11858/12188 [1:49:01<38:33, 7.01s/it] 97%|█████████▋| 11859/12188 [1:49:08<38:32, 7.03s/it] {'loss': 0.3245, 'grad_norm': 0.8855606704493777, 'learning_rate': 1.921353974799367e-08, 'epoch': 0.97} + 97%|█████████▋| 11859/12188 [1:49:08<38:32, 7.03s/it] 97%|█████████▋| 11860/12188 [1:49:16<39:09, 7.16s/it] {'loss': 0.2694, 'grad_norm': 0.7432036240150038, 'learning_rate': 1.9097344556668894e-08, 'epoch': 0.97} + 97%|█████████▋| 11860/12188 [1:49:16<39:09, 7.16s/it] 97%|█████████▋| 11861/12188 [1:49:23<39:25, 7.23s/it] {'loss': 0.3029, 'grad_norm': 0.9759948089754424, 'learning_rate': 1.8981501108686907e-08, 'epoch': 0.97} + 97%|█████████▋| 11861/12188 [1:49:23<39:25, 7.23s/it] 97%|█████████▋| 11862/12188 [1:49:31<40:08, 7.39s/it] {'loss': 0.3005, 'grad_norm': 0.7392673878992336, 'learning_rate': 1.8866009412228937e-08, 'epoch': 0.97} + 97%|█████████▋| 11862/12188 [1:49:31<40:08, 7.39s/it] 97%|█████████▋| 11863/12188 [1:49:39<40:46, 7.53s/it] {'loss': 0.2909, 'grad_norm': 0.6722309356001488, 'learning_rate': 1.8750869475450682e-08, 'epoch': 0.97} + 97%|█████████▋| 11863/12188 [1:49:39<40:46, 7.53s/it] 97%|█████████▋| 11864/12188 [1:49:47<41:05, 7.61s/it] {'loss': 0.2656, 'grad_norm': 0.649022324284623, 'learning_rate': 1.8636081306482866e-08, 'epoch': 0.97} + 97%|█████████▋| 11864/12188 [1:49:47<41:05, 7.61s/it] 97%|█████████▋| 11865/12188 [1:49:54<39:49, 7.40s/it] {'loss': 0.2749, 'grad_norm': 0.7518362773598753, 'learning_rate': 1.852164491343178e-08, 'epoch': 0.97} + 97%|█████████▋| 11865/12188 [1:49:54<39:49, 7.40s/it] 97%|█████████▋| 11866/12188 [1:50:01<39:09, 7.30s/it] {'loss': 0.294, 'grad_norm': 1.308787271222141, 'learning_rate': 1.8407560304378736e-08, 'epoch': 0.97} + 97%|█████████▋| 11866/12188 [1:50:01<39:09, 7.30s/it] 97%|█████████▋| 11867/12188 [1:50:08<38:40, 7.23s/it] {'loss': 0.3123, 'grad_norm': 0.682007190443904, 'learning_rate': 1.8293827487380623e-08, 'epoch': 0.97} + 97%|█████████▋| 11867/12188 [1:50:08<38:40, 7.23s/it] 97%|█████████▋| 11868/12188 [1:50:15<38:11, 7.16s/it] {'loss': 0.2839, 'grad_norm': 0.7345528240983369, 'learning_rate': 1.818044647046824e-08, 'epoch': 0.97} + 97%|█████████▋| 11868/12188 [1:50:15<38:11, 7.16s/it] 97%|█████████▋| 11869/12188 [1:50:22<37:47, 7.11s/it] {'loss': 0.2807, 'grad_norm': 0.9072050917313449, 'learning_rate': 1.8067417261649066e-08, 'epoch': 0.97} + 97%|█████████▋| 11869/12188 [1:50:22<37:47, 7.11s/it] 97%|█████████▋| 11870/12188 [1:50:29<37:42, 7.12s/it] {'loss': 0.299, 'grad_norm': 0.7269971130173388, 'learning_rate': 1.795473986890506e-08, 'epoch': 0.97} + 97%|█████████▋| 11870/12188 [1:50:29<37:42, 7.12s/it] 97%|█████████▋| 11871/12188 [1:50:36<37:02, 7.01s/it] {'loss': 0.3046, 'grad_norm': 0.7753376939622861, 'learning_rate': 1.7842414300192624e-08, 'epoch': 0.97} + 97%|█████████▋| 11871/12188 [1:50:36<37:02, 7.01s/it] 97%|█████████▋| 11872/12188 [1:50:43<37:23, 7.10s/it] {'loss': 0.3205, 'grad_norm': 0.710847867469426, 'learning_rate': 1.7730440563444307e-08, 'epoch': 0.97} + 97%|█████████▋| 11872/12188 [1:50:43<37:23, 7.10s/it] 97%|█████████▋| 11873/12188 [1:50:50<37:58, 7.23s/it] {'loss': 0.286, 'grad_norm': 0.6639027792085632, 'learning_rate': 1.7618818666568226e-08, 'epoch': 0.97} + 97%|█████████▋| 11873/12188 [1:50:51<37:58, 7.23s/it] 97%|█████████▋| 11874/12188 [1:50:58<38:19, 7.32s/it] {'loss': 0.2698, 'grad_norm': 0.7043084657715095, 'learning_rate': 1.7507548617445857e-08, 'epoch': 0.97} + 97%|█████████▋| 11874/12188 [1:50:58<38:19, 7.32s/it] 97%|█████████▋| 11875/12188 [1:51:05<37:23, 7.17s/it] {'loss': 0.2869, 'grad_norm': 0.6661939979300832, 'learning_rate': 1.7396630423935356e-08, 'epoch': 0.97} + 97%|█████████▋| 11875/12188 [1:51:05<37:23, 7.17s/it] 97%|█████████▋| 11876/12188 [1:51:12<36:53, 7.09s/it] {'loss': 0.2918, 'grad_norm': 0.6625707946913971, 'learning_rate': 1.7286064093869902e-08, 'epoch': 0.97} + 97%|█████████▋| 11876/12188 [1:51:12<36:53, 7.09s/it] 97%|█████████▋| 11877/12188 [1:51:19<36:57, 7.13s/it] {'loss': 0.342, 'grad_norm': 1.0589602031384298, 'learning_rate': 1.7175849635057694e-08, 'epoch': 0.97} + 97%|█████████▋| 11877/12188 [1:51:19<36:57, 7.13s/it] 97%|█████████▋| 11878/12188 [1:51:28<39:07, 7.57s/it] {'loss': 0.2773, 'grad_norm': 0.694215180512439, 'learning_rate': 1.7065987055280842e-08, 'epoch': 0.97} + 97%|█████████▋| 11878/12188 [1:51:28<39:07, 7.57s/it] 97%|█████████▋| 11879/12188 [1:51:35<38:26, 7.46s/it] {'loss': 0.2839, 'grad_norm': 0.6830049281664957, 'learning_rate': 1.6956476362298692e-08, 'epoch': 0.97} + 97%|█████████▋| 11879/12188 [1:51:35<38:26, 7.46s/it] 97%|█████████▋| 11880/12188 [1:51:44<40:33, 7.90s/it] {'loss': 0.3307, 'grad_norm': 0.747012593601823, 'learning_rate': 1.6847317563843946e-08, 'epoch': 0.97} + 97%|██████��██▋| 11880/12188 [1:51:44<40:33, 7.90s/it] 97%|█████████▋| 11881/12188 [1:51:52<41:17, 8.07s/it] {'loss': 0.2917, 'grad_norm': 0.693662803699445, 'learning_rate': 1.6738510667625997e-08, 'epoch': 0.97} + 97%|█████████▋| 11881/12188 [1:51:52<41:17, 8.07s/it] 97%|█████████▋| 11882/12188 [1:51:59<39:45, 7.80s/it] {'loss': 0.2687, 'grad_norm': 1.3968380746803541, 'learning_rate': 1.6630055681327582e-08, 'epoch': 0.97} + 97%|█████████▋| 11882/12188 [1:51:59<39:45, 7.80s/it] 97%|█████████▋| 11883/12188 [1:52:06<37:50, 7.44s/it] {'loss': 0.2786, 'grad_norm': 0.6498836135349566, 'learning_rate': 1.6521952612608693e-08, 'epoch': 0.97} + 97%|█████████▋| 11883/12188 [1:52:06<37:50, 7.44s/it] 98%|█████████▊| 11884/12188 [1:52:13<36:32, 7.21s/it] {'loss': 0.316, 'grad_norm': 0.6485501978093866, 'learning_rate': 1.6414201469102664e-08, 'epoch': 0.98} + 98%|█████████▊| 11884/12188 [1:52:13<36:32, 7.21s/it] 98%|█████████▊| 11885/12188 [1:52:19<35:46, 7.08s/it] {'loss': 0.2715, 'grad_norm': 0.7565043794395446, 'learning_rate': 1.630680225841952e-08, 'epoch': 0.98} + 98%|█████████▊| 11885/12188 [1:52:19<35:46, 7.08s/it] 98%|█████████▊| 11886/12188 [1:52:28<37:41, 7.49s/it] {'loss': 0.3093, 'grad_norm': 0.6786743808355639, 'learning_rate': 1.6199754988142635e-08, 'epoch': 0.98} + 98%|█████████▊| 11886/12188 [1:52:28<37:41, 7.49s/it] 98%|█████████▊| 11887/12188 [1:52:35<36:24, 7.26s/it] {'loss': 0.3051, 'grad_norm': 0.7226845063221663, 'learning_rate': 1.609305966583208e-08, 'epoch': 0.98} + 98%|█████████▊| 11887/12188 [1:52:35<36:24, 7.26s/it] 98%|█████████▊| 11888/12188 [1:52:41<35:24, 7.08s/it] {'loss': 0.2729, 'grad_norm': 0.6862979913173561, 'learning_rate': 1.5986716299022375e-08, 'epoch': 0.98} + 98%|█████████▊| 11888/12188 [1:52:41<35:24, 7.08s/it] 98%|█████████▊| 11889/12188 [1:52:48<34:25, 6.91s/it] {'loss': 0.306, 'grad_norm': 0.7532617629796952, 'learning_rate': 1.5880724895223077e-08, 'epoch': 0.98} + 98%|█████████▊| 11889/12188 [1:52:48<34:25, 6.91s/it] 98%|█████████▊| 11890/12188 [1:52:55<34:19, 6.91s/it] {'loss': 0.2653, 'grad_norm': 0.599283698298367, 'learning_rate': 1.577508546191986e-08, 'epoch': 0.98} + 98%|█████████▊| 11890/12188 [1:52:55<34:19, 6.91s/it] 98%|█████████▊| 11891/12188 [1:53:05<38:56, 7.87s/it] {'loss': 0.3352, 'grad_norm': 0.735857331745072, 'learning_rate': 1.5669798006572313e-08, 'epoch': 0.98} + 98%|█████████▊| 11891/12188 [1:53:05<38:56, 7.87s/it] 98%|█████████▊| 11892/12188 [1:53:12<38:08, 7.73s/it] {'loss': 0.2741, 'grad_norm': 0.6395743180627776, 'learning_rate': 1.5564862536615598e-08, 'epoch': 0.98} + 98%|█████████▊| 11892/12188 [1:53:12<38:08, 7.73s/it] 98%|█████████▊| 11893/12188 [1:53:19<36:41, 7.46s/it] {'loss': 0.2893, 'grad_norm': 0.7774362390220685, 'learning_rate': 1.5460279059459903e-08, 'epoch': 0.98} + 98%|█████████▊| 11893/12188 [1:53:19<36:41, 7.46s/it] 98%|█████████▊| 11894/12188 [1:53:26<35:48, 7.31s/it] {'loss': 0.2947, 'grad_norm': 0.6647476288199389, 'learning_rate': 1.535604758249154e-08, 'epoch': 0.98} + 98%|█████████▊| 11894/12188 [1:53:26<35:48, 7.31s/it] 98%|█████████▊| 11895/12188 [1:53:33<35:36, 7.29s/it] {'loss': 0.3188, 'grad_norm': 0.721279766329504, 'learning_rate': 1.5252168113070177e-08, 'epoch': 0.98} + 98%|█████████▊| 11895/12188 [1:53:33<35:36, 7.29s/it] 98%|█████████▊| 11896/12188 [1:53:42<37:12, 7.65s/it] {'loss': 0.3331, 'grad_norm': 0.8596061643527629, 'learning_rate': 1.5148640658532164e-08, 'epoch': 0.98} + 98%|█████████▊| 11896/12188 [1:53:42<37:12, 7.65s/it] 98%|█████████▊| 11897/12188 [1:53:49<37:03, 7.64s/it] {'loss': 0.2716, 'grad_norm': 0.7918838832898236, 'learning_rate': 1.5045465226188882e-08, 'epoch': 0.98} + 98%|█████████▊| 11897/12188 [1:53:49<37:03, 7.64s/it] 98%|█████████▊| 11898/12188 [1:53:56<35:29, 7.34s/it] {'loss': 0.2935, 'grad_norm': 0.6571730556806472, 'learning_rate': 1.4942641823325056e-08, 'epoch': 0.98} + 98%|█████████▊| 11898/12188 [1:53:56<35:29, 7.34s/it] 98%|█████████▊| 11899/12188 [1:54:05<38:18, 7.95s/it] {'loss': 0.2781, 'grad_norm': 0.6996832564461056, 'learning_rate': 1.4840170457203206e-08, 'epoch': 0.98} + 98%|█████████▊| 11899/12188 [1:54:05<38:18, 7.95s/it] 98%|█████████▊| 11900/12188 [1:54:13<37:42, 7.86s/it] {'loss': 0.2797, 'grad_norm': 0.6672509644622373, 'learning_rate': 1.4738051135059217e-08, 'epoch': 0.98} + 98%|█████████▊| 11900/12188 [1:54:13<37:42, 7.86s/it] 98%|█████████▊| 11901/12188 [1:54:21<37:46, 7.90s/it] {'loss': 0.3351, 'grad_norm': 0.7774826549561362, 'learning_rate': 1.463628386410454e-08, 'epoch': 0.98} + 98%|█████████▊| 11901/12188 [1:54:21<37:46, 7.90s/it] 98%|█████████▊| 11902/12188 [1:54:28<36:03, 7.57s/it] {'loss': 0.2728, 'grad_norm': 0.6896030820946668, 'learning_rate': 1.4534868651526202e-08, 'epoch': 0.98} + 98%|█████████▊| 11902/12188 [1:54:28<36:03, 7.57s/it] 98%|█████████▊| 11903/12188 [1:54:35<34:51, 7.34s/it] {'loss': 0.2932, 'grad_norm': 0.6851748888167124, 'learning_rate': 1.4433805504485699e-08, 'epoch': 0.98} + 98%|█████████▊| 11903/12188 [1:54:35<34:51, 7.34s/it] 98%|█████████▊| 11904/12188 [1:54:42<34:52, 7.37s/it] {'loss': 0.3022, 'grad_norm': 0.7521558602731356, 'learning_rate': 1.4333094430119542e-08, 'epoch': 0.98} + 98%|█████████▊| 11904/12188 [1:54:42<34:52, 7.37s/it] 98%|█████████▊| 11905/12188 [1:54:49<33:57, 7.20s/it] {'loss': 0.2788, 'grad_norm': 0.7693870064057333, 'learning_rate': 1.4232735435540378e-08, 'epoch': 0.98} + 98%|█████████▊| 11905/12188 [1:54:49<33:57, 7.20s/it] 98%|█████████▊| 11906/12188 [1:54:56<33:53, 7.21s/it] {'loss': 0.3195, 'grad_norm': 0.6875294579881008, 'learning_rate': 1.4132728527835315e-08, 'epoch': 0.98} + 98%|█████████▊| 11906/12188 [1:54:56<33:53, 7.21s/it] 98%|█████████▊| 11907/12188 [1:55:03<33:03, 7.06s/it] {'loss': 0.3285, 'grad_norm': 0.716008739718903, 'learning_rate': 1.4033073714065926e-08, 'epoch': 0.98} + 98%|█████████▊| 11907/12188 [1:55:03<33:03, 7.06s/it] 98%|█████████▊| 11908/12188 [1:55:09<32:19, 6.93s/it] {'loss': 0.3072, 'grad_norm': 0.730305742780468, 'learning_rate': 1.3933771001271023e-08, 'epoch': 0.98} + 98%|█████████▊| 11908/12188 [1:55:09<32:19, 6.93s/it] 98%|█████████▊| 11909/12188 [1:55:17<32:58, 7.09s/it] {'loss': 0.321, 'grad_norm': 0.6913017064994512, 'learning_rate': 1.3834820396462223e-08, 'epoch': 0.98} + 98%|█████████▊| 11909/12188 [1:55:17<32:58, 7.09s/it] 98%|█████████▊| 11910/12188 [1:55:24<33:17, 7.19s/it] {'loss': 0.2629, 'grad_norm': 0.7132467200308904, 'learning_rate': 1.3736221906627822e-08, 'epoch': 0.98} + 98%|█████████▊| 11910/12188 [1:55:24<33:17, 7.19s/it] 98%|█████████▊| 11911/12188 [1:55:32<33:52, 7.34s/it] {'loss': 0.2833, 'grad_norm': 0.8067259420011141, 'learning_rate': 1.363797553873003e-08, 'epoch': 0.98} + 98%|█████████▊| 11911/12188 [1:55:32<33:52, 7.34s/it] 98%|█████████▊| 11912/12188 [1:55:39<33:47, 7.35s/it] {'loss': 0.2882, 'grad_norm': 0.6693309232113416, 'learning_rate': 1.3540081299707187e-08, 'epoch': 0.98} + 98%|█████████▊| 11912/12188 [1:55:39<33:47, 7.35s/it] 98%|█████████▊| 11913/12188 [1:55:47<33:31, 7.31s/it] {'loss': 0.2663, 'grad_norm': 0.6897247375285968, 'learning_rate': 1.3442539196472647e-08, 'epoch': 0.98} + 98%|█████████▊| 11913/12188 [1:55:47<33:31, 7.31s/it] 98%|█████████▊| 11914/12188 [1:55:53<32:51, 7.19s/it] {'loss': 0.3048, 'grad_norm': 0.7115647347036778, 'learning_rate': 1.334534923591424e-08, 'epoch': 0.98} + 98%|█████████▊| 11914/12188 [1:55:53<32:51, 7.19s/it] 98%|█████████▊| 11915/12188 [1:56:00<32:22, 7.12s/it] {'loss': 0.2825, 'grad_norm': 0.671923898282353, 'learning_rate': 1.324851142489536e-08, 'epoch': 0.98} + 98%|█████████▊| 11915/12188 [1:56:00<32:22, 7.12s/it] 98%|█████████▊| 11916/12188 [1:56:08<33:09, 7.31s/it] {'loss': 0.319, 'grad_norm': 0.7182536778310277, 'learning_rate': 1.3152025770255539e-08, 'epoch': 0.98} + 98%|█████████▊| 11916/12188 [1:56:08<33:09, 7.31s/it] 98%|█████████▊| 11917/12188 [1:56:15<32:37, 7.22s/it] {'loss': 0.2768, 'grad_norm': 0.6592258713577072, 'learning_rate': 1.3055892278807103e-08, 'epoch': 0.98} + 98%|█████████▊| 11917/12188 [1:56:15<32:37, 7.22s/it] 98%|█████████▊| 11918/12188 [1:56:23<33:04, 7.35s/it] {'loss': 0.2849, 'grad_norm': 0.6237131233517783, 'learning_rate': 1.2960110957339623e-08, 'epoch': 0.98} + 98%|█████████▊| 11918/12188 [1:56:23<33:04, 7.35s/it] 98%|█████████▊| 11919/12188 [1:56:30<33:09, 7.39s/it] {'loss': 0.2963, 'grad_norm': 0.6782626166591507, 'learning_rate': 1.2864681812616575e-08, 'epoch': 0.98} + 98%|█████████▊| 11919/12188 [1:56:30<33:09, 7.39s/it] 98%|█████████▊| 11920/12188 [1:56:37<32:42, 7.32s/it] {'loss': 0.3034, 'grad_norm': 0.7235991068597268, 'learning_rate': 1.276960485137757e-08, 'epoch': 0.98} + 98%|█████████▊| 11920/12188 [1:56:37<32:42, 7.32s/it] 98%|█████████▊| 11921/12188 [1:56:45<32:32, 7.31s/it] {'loss': 0.2736, 'grad_norm': 0.8088868405843934, 'learning_rate': 1.2674880080336682e-08, 'epoch': 0.98} + 98%|█████████▊| 11921/12188 [1:56:45<32:32, 7.31s/it] 98%|█████████▊| 11922/12188 [1:56:51<31:31, 7.11s/it] {'loss': 0.2816, 'grad_norm': 0.7451232339891718, 'learning_rate': 1.258050750618245e-08, 'epoch': 0.98} + 98%|█████████▊| 11922/12188 [1:56:51<31:31, 7.11s/it] 98%|█████████▊| 11923/12188 [1:56:58<30:56, 7.00s/it] {'loss': 0.2838, 'grad_norm': 0.6553796872125283, 'learning_rate': 1.2486487135580094e-08, 'epoch': 0.98} + 98%|█████████▊| 11923/12188 [1:56:58<30:56, 7.00s/it] 98%|█████████▊| 11924/12188 [1:57:06<31:31, 7.17s/it] {'loss': 0.2937, 'grad_norm': 0.6840434626314753, 'learning_rate': 1.2392818975168752e-08, 'epoch': 0.98} + 98%|█████████▊| 11924/12188 [1:57:06<31:31, 7.17s/it] 98%|█████████▊| 11925/12188 [1:57:14<33:04, 7.55s/it] {'loss': 0.2823, 'grad_norm': 0.7340115411264888, 'learning_rate': 1.2299503031563687e-08, 'epoch': 0.98} + 98%|█████████▊| 11925/12188 [1:57:14<33:04, 7.55s/it] 98%|█████████▊| 11926/12188 [1:57:22<32:55, 7.54s/it] {'loss': 0.3214, 'grad_norm': 0.684876338656813, 'learning_rate': 1.2206539311354071e-08, 'epoch': 0.98} + 98%|█████████▊| 11926/12188 [1:57:22<32:55, 7.54s/it] 98%|█████████▊| 11927/12188 [1:57:28<31:41, 7.28s/it] {'loss': 0.2608, 'grad_norm': 0.646075022662251, 'learning_rate': 1.211392782110521e-08, 'epoch': 0.98} + 98%|█████████▊| 11927/12188 [1:57:28<31:41, 7.28s/it] 98%|█████████▊| 11928/12188 [1:57:35<30:35, 7.06s/it] {'loss': 0.2984, 'grad_norm': 0.7047747812061557, 'learning_rate': 1.2021668567357425e-08, 'epoch': 0.98} + 98%|█████████▊| 11928/12188 [1:57:35<30:35, 7.06s/it] 98%|█████████▊| 11929/12188 [1:57:42<30:39, 7.10s/it] {'loss': 0.2912, 'grad_norm': 0.7009713740119918, 'learning_rate': 1.1929761556625507e-08, 'epoch': 0.98} + 98%|█████████▊| 11929/12188 [1:57:42<30:39, 7.10s/it] 98%|█████████▊| 11930/12188 [1:57:49<30:13, 7.03s/it] {'loss': 0.3169, 'grad_norm': 0.6749379387003258, 'learning_rate': 1.183820679539982e-08, 'epoch': 0.98} + 98%|█████████▊| 11930/12188 [1:57:49<30:13, 7.03s/it] 98%|█████████▊| 11931/12188 [1:57:56<30:26, 7.11s/it] {'loss': 0.3031, 'grad_norm': 0.7146555679068053, 'learning_rate': 1.1747004290145747e-08, 'epoch': 0.98} + 98%|█████████▊| 11931/12188 [1:57:56<30:26, 7.11s/it] 98%|█████████▊| 11932/12188 [1:58:03<30:04, 7.05s/it] {'loss': 0.2681, 'grad_norm': 0.7059949214720915, 'learning_rate': 1.1656154047303691e-08, 'epoch': 0.98} + 98%|█████████▊| 11932/12188 [1:58:03<30:04, 7.05s/it] 98%|█████████▊| 11933/12188 [1:58:11<31:26, 7.40s/it] {'loss': 0.3109, 'grad_norm': 0.6873247249752971, 'learning_rate': 1.1565656073290188e-08, 'epoch': 0.98} + 98%|█████████▊| 11933/12188 [1:58:11<31:26, 7.40s/it] 98%|█████████▊| 11934/12188 [1:58:19<31:02, 7.33s/it] {'loss': 0.2942, 'grad_norm': 1.058665671893197, 'learning_rate': 1.147551037449568e-08, 'epoch': 0.98} + 98%|█████████▊| 11934/12188 [1:58:19<31:02, 7.33s/it] 98%|█████████▊| 11935/12188 [1:58:25<30:16, 7.18s/it] {'loss': 0.2881, 'grad_norm': 0.6718592271944607, 'learning_rate': 1.138571695728563e-08, 'epoch': 0.98} + 98%|█████████▊| 11935/12188 [1:58:25<30:16, 7.18s/it] 98%|█████████▊| 11936/12188 [1:58:32<29:42, 7.07s/it] {'loss': 0.3064, 'grad_norm': 0.7807521644984842, 'learning_rate': 1.1296275828001635e-08, 'epoch': 0.98} + 98%|█████████▊| 11936/12188 [1:58:32<29:42, 7.07s/it] 98%|█████████▊| 11937/12188 [1:58:40<30:16, 7.24s/it] {'loss': 0.3195, 'grad_norm': 0.825639078247704, 'learning_rate': 1.1207186992959197e-08, 'epoch': 0.98} + 98%|█████████▊| 11937/12188 [1:58:40<30:16, 7.24s/it] 98%|█████████▊| 11938/12188 [1:58:48<30:48, 7.39s/it] {'loss': 0.259, 'grad_norm': 0.6697042407924105, 'learning_rate': 1.1118450458450503e-08, 'epoch': 0.98} + 98%|█████████▊| 11938/12188 [1:58:48<30:48, 7.39s/it] 98%|█████████▊| 11939/12188 [1:58:54<29:42, 7.16s/it] {'loss': 0.2757, 'grad_norm': 0.756147488587098, 'learning_rate': 1.1030066230741099e-08, 'epoch': 0.98} + 98%|█████████▊| 11939/12188 [1:58:54<29:42, 7.16s/it] 98%|█████████▊| 11940/12188 [1:59:01<29:43, 7.19s/it] {'loss': 0.2929, 'grad_norm': 0.7753499303092624, 'learning_rate': 1.094203431607377e-08, 'epoch': 0.98} + 98%|█████████▊| 11940/12188 [1:59:01<29:43, 7.19s/it] 98%|█████████▊| 11941/12188 [1:59:09<29:27, 7.15s/it] {'loss': 0.2955, 'grad_norm': 0.6547270857625173, 'learning_rate': 1.0854354720664095e-08, 'epoch': 0.98} + 98%|█████████▊| 11941/12188 [1:59:09<29:27, 7.15s/it] 98%|█████████▊| 11942/12188 [1:59:15<29:03, 7.09s/it] {'loss': 0.2754, 'grad_norm': 0.6847846113631998, 'learning_rate': 1.0767027450703793e-08, 'epoch': 0.98} + 98%|█████████▊| 11942/12188 [1:59:15<29:03, 7.09s/it] 98%|█████████▊| 11943/12188 [1:59:23<29:59, 7.35s/it] {'loss': 0.2773, 'grad_norm': 0.7213576333290136, 'learning_rate': 1.0680052512360706e-08, 'epoch': 0.98} + 98%|█████████▊| 11943/12188 [1:59:23<29:59, 7.35s/it] 98%|█████████▊| 11944/12188 [1:59:31<30:40, 7.54s/it] {'loss': 0.2972, 'grad_norm': 0.6706684170922074, 'learning_rate': 1.0593429911776588e-08, 'epoch': 0.98} + 98%|█████████▊| 11944/12188 [1:59:31<30:40, 7.54s/it] 98%|█████████▊| 11945/12188 [1:59:39<30:34, 7.55s/it] {'loss': 0.2933, 'grad_norm': 0.6840976505097629, 'learning_rate': 1.0507159655067656e-08, 'epoch': 0.98} + 98%|█████████▊| 11945/12188 [1:59:39<30:34, 7.55s/it] 98%|█████████▊| 11946/12188 [1:59:49<33:56, 8.42s/it] {'loss': 0.2998, 'grad_norm': 0.6629915021282341, 'learning_rate': 1.0421241748327371e-08, 'epoch': 0.98} + 98%|█████████▊| 11946/12188 [1:59:49<33:56, 8.42s/it] 98%|█████████▊| 11947/12188 [1:59:57<32:25, 8.07s/it] {'loss': 0.3421, 'grad_norm': 0.7706916899804701, 'learning_rate': 1.0335676197622546e-08, 'epoch': 0.98} + 98%|█████████▊| 11947/12188 [1:59:57<32:25, 8.07s/it] 98%|█████████▊| 11948/12188 [2:00:04<31:27, 7.86s/it] {'loss': 0.2923, 'grad_norm': 0.8293156183036704, 'learning_rate': 1.025046300899557e-08, 'epoch': 0.98} + 98%|█████████▊| 11948/12188 [2:00:04<31:27, 7.86s/it]W0818 17:09:59.278000 115979 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:09:59.278000 115979 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:09:59.278000 115979 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:09:59.278000 115979 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:09:59.418000 46457 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:09:59.418000 46457 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:09:59.418000 46457 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:09:59.418000 46457 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:09:59.632000 84449 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:09:59.632000 84449 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:09:59.632000 84449 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:09:59.632000 84449 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:10:00.183000 50629 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:10:00.183000 50629 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:10:00.183000 50629 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:10:00.183000 50629 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:10:03.257000 8501 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:10:03.257000 8501 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:10:03.257000 8501 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:10:03.257000 8501 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:10:59.942000 49836 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:10:59.942000 49836 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:10:59.942000 49836 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:10:59.942000 49836 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:00.061000 119316 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:11:00.061000 119316 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:00.061000 119316 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:11:00.061000 119316 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:01.025000 88658 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:11:01.025000 88658 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:01.025000 88658 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:11:01.025000 88658 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:01.042000 54778 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:11:01.042000 54778 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:01.042000 54778 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:11:01.042000 54778 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:01.584000 115515 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:11:01.584000 115515 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:01.584000 115515 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:11:01.584000 115515 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:01.638000 75842 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:11:01.638000 75842 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:01.638000 75842 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:11:01.638000 75842 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:03.368000 11937 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:11:03.368000 11937 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:03.368000 11937 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:11:03.368000 11937 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:29.304000 50718 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:11:29.304000 50718 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:29.304000 50718 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:11:29.304000 50718 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:29.670000 40858 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:11:29.670000 40858 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:29.670000 40858 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:11:29.670000 40858 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:29.853000 116318 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:11:29.853000 116318 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:29.853000 116318 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:11:29.853000 116318 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:30.087000 55610 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:11:30.087000 55610 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:30.087000 55610 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:11:30.087000 55610 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:30.145000 76729 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:11:30.145000 76729 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:30.145000 76729 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:11:30.145000 76729 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:30.268000 120097 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:11:30.268000 120097 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:30.268000 120097 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:11:30.268000 120097 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:31.348000 89443 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:11:31.348000 89443 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:31.348000 89443 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:11:31.348000 89443 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:31.754000 12658 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:11:31.754000 12658 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:11:31.754000 12658 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:11:31.754000 12658 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 17:11:51,861] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,862] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,863] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,867] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,882] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,882] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,882] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,882] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,889] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,889] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,890] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,891] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,901] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,900] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,900] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,900] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,924] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,929] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,916] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,916] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,916] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,919] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,920] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,921] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,935] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,920] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,939] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,940] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,940] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:11:51,941] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +W0818 17:12:29.600000 52748 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:12:29.600000 52748 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:12:29.600000 52748 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:12:29.600000 52748 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:12:30.766000 118306 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:12:30.766000 118306 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:12:30.766000 118306 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:12:30.766000 118306 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:12:30.867000 78569 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:12:30.867000 78569 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:12:30.867000 78569 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:12:30.867000 78569 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:12:31.618000 122168 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:12:31.618000 122168 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:12:31.618000 122168 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:12:31.618000 122168 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:12:31.634000 91661 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:12:31.634000 91661 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:12:31.634000 91661 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:12:31.634000 91661 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:12:31.905000 42736 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:12:31.905000 42736 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:12:31.905000 42736 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:12:31.905000 42736 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:12:34.263000 14806 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:12:34.263000 14806 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:12:34.263000 14806 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:12:34.263000 14806 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:12:36.520000 48677 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 17:12:36.520000 48677 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 17:12:36.520000 48677 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 17:12:36.520000 48677 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 17:12:49,397] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:49,398] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:49,399] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:49,399] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:49,399] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:49,399] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:49,399] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:49,400] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:50,612] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:50,617] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:50,623] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:50,623] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:50,624] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:50,625] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:50,626] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:50,626] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:51,303] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:51,303] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:51,312] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:51,312] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:51,312] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:51,313] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:51,315] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:51,315] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:52,366] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:52,366] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:52,366] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:52,366] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:52,366] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:52,366] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:52,366] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:52,366] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:52,795] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:12:52,922] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:12:52,923] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:12:52,926] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:12:52,927] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:12:52,928] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:12:52,928] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:12:52,928] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:12:53,516] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:53,516] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:53,516] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:53,516] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:53,516] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:53,516] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:53,516] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:53,516] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:54,012] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:12:54,092] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:54,092] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:54,092] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:54,092] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:54,093] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:54,093] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:54,094] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:54,094] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:12:54,152] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:12:54,161] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:12:54,161] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:12:54,165] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:12:54,166] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:12:54,166] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:12:54,168] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:12:54,493] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,494] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,495] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,499] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,504] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,504] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,510] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,510] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,537] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,538] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,538] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,538] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,539] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,539] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,539] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,539] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,543] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:12:54,671] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:12:54,673] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:12:54,674] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:12:54,675] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:12:54,675] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:12:54,681] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:12:54,681] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:12:54,808] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,816] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,835] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,835] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,836] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,837] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,837] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:54,837] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:57,786] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:57,817] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:57,820] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:57,820] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:57,824] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:57,825] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:57,825] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:12:57,825] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:13:02,334] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:02,324] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:02,324] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:02,324] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:02,324] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:02,324] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:02,324] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:02,334] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:02,334] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:02,334] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:02,334] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:02,334] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:02,335] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:02,335] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:02,345] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:02,768] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:02,766] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:13:02,819] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:13:02,872] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:02,874] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:13:02,883] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:02,888] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:13:02,889] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:02,890] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:13:02,892] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:13:02,893] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:13:02,903] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:13:02,912] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:02,913] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:02,913] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:02,914] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:02,915] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:13:02,953] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:13:02,962] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:02,963] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:02,966] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:13:02,967] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:02,967] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:02,968] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:13:03,415] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:03,416] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:03,416] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:03,424] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:03,426] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:03,433] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:03,434] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:03,434] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:04,002] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:13:04,090] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:13:04,092] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:13:04,092] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:13:04,092] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:13:04,095] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:13:04,095] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:13:04,095] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:13:04,095] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 17:13:04,227] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:04,229] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:04,229] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:04,232] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:04,236] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:04,237] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:13:04,242] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:13:09,434] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:09,434] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:09,434] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-18 17:13:09,434] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:09,434] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:09,434] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:09,434] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:09,437] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:09,437] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 17:13:09,920] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:13:10,063] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:10,067] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 17:13:10,068] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:10,069] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:10,069] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:10,070] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 17:13:10,070] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +W0818 19:35:00.223000 118687 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 19:35:00.223000 118687 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 19:35:00.223000 118687 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 19:35:00.223000 118687 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 19:35:00.281000 124553 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 19:35:00.281000 124553 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 19:35:00.281000 124553 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 19:35:00.281000 124553 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 19:35:00.311000 87630 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 19:35:00.311000 87630 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 19:35:00.311000 87630 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 19:35:00.311000 87630 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 19:35:01.482000 19970 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 19:35:01.482000 19970 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 19:35:01.482000 19970 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 19:35:01.482000 19970 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 19:35:02.006000 117980 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 19:35:02.006000 117980 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 19:35:02.006000 117980 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 19:35:02.006000 117980 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 19:35:06.061000 123319 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 19:35:06.061000 123319 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 19:35:06.061000 123319 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 19:35:06.061000 123319 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 19:35:07.105000 120513 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 19:35:07.105000 120513 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 19:35:07.105000 120513 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 19:35:07.105000 120513 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 19:35:30,118] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,132] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,147] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,147] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,173] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,173] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,177] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,179] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,213] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,179] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,213] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,202] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,224] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,202] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,203] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,203] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,230] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,232] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,232] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,232] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,233] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,727] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,728] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,728] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,728] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,730] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,730] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,730] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,730] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,711] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,711] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,712] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,712] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,715] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,716] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,716] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:30,716] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:32,469] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:32,469] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:32,470] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:32,470] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:32,472] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:32,472] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:32,472] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:32,472] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:36,177] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,145] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,145] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,145] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,145] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,146] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,146] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,147] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,356] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,356] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,356] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,357] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,357] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,357] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,357] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,359] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,434] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,434] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,434] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,434] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,434] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,434] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,434] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,435] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:36,618] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:36,650] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:36,713] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:36,720] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:36,721] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:36,711] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:36,712] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:36,714] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:36,714] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:36,715] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:36,730] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:36,716] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:36,730] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:36,718] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:36,732] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:36,732] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:36,758] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:36,760] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:36,761] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:36,764] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:36,765] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:36,765] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:36,768] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:36,860] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:36,932] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:36,981] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:36,982] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:36,990] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:36,991] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:36,994] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:36,995] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:36,996] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:37,062] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:37,067] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:37,068] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:37,069] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:37,070] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:37,074] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:37,074] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:39,226] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:39,227] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:39,228] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:39,229] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:39,229] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:39,230] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:39,232] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:39,233] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:39,233] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-18 19:35:39,725] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:39,842] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:39,871] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:39,871] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:39,872] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:39,872] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:39,873] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:39,874] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:42,212] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:42,220] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:42,221] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:42,236] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:42,236] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:42,246] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:42,247] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:42,247] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:42,947] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:42,948] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:42,948] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:42,948] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:42,951] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:42,951] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:42,951] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:42,951] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 19:35:49,172] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:49,172] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:49,172] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:49,173] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:49,173] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:49,176] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:49,176] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:49,176] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:50,245] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:50,481] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:50,482] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:50,482] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:50,503] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:50,504] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:50,505] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:50,505] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:52,263] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:52,263] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:52,263] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:52,263] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:52,263] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:52,263] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:52,264] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:52,264] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 19:35:52,753] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:52,919] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:52,923] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:52,924] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:52,925] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:52,925] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 19:35:52,926] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:52,934] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 19:35:55,512] [INFO] [partition_parameters.py:348:__exit__] finished initializing model - num_params = 825, num_elems = 4.07B + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +W0818 20:27:52.550000 105171 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:27:52.550000 105171 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:27:52.550000 105171 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:27:52.550000 105171 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:27:52.527000 128453 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:27:52.527000 128453 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:27:52.527000 128453 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:27:52.527000 128453 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:27:52.559000 31842 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:27:52.559000 31842 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:27:52.559000 31842 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:27:52.559000 31842 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:27:52.794000 80335 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:27:52.794000 80335 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:27:52.794000 80335 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:27:52.794000 80335 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:27:56.075000 44647 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:27:56.075000 44647 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:27:56.075000 44647 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:27:56.075000 44647 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 20:28:12,786] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,780] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,810] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,837] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,837] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,838] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,838] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,839] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,839] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,846] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,847] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,844] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,844] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,854] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,855] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,856] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,856] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,856] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,856] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,836] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,859] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,864] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,864] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,865] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,884] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,884] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,885] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,885] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,860] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,887] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,888] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,888] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,861] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,862] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,862] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,866] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,882] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,882] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,883] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,885] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,885] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,885] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:12,885] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:16,646] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,652] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,652] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,653] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,653] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,653] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,654] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,654] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,655] [INFO] [comm.py:652:init_distributed] cdb=None + [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,636] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,658] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,650] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,650] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,650] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,650] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,652] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,658] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,658] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,658] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,659] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,659] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,659] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,659] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,658] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,658] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,659] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,659] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,659] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,659] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,659] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:16,661] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:17,096] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:17,109] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:17,107] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:17,148] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,138] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:17,152] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:17,228] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,232] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,232] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:17,234] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,234] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:17,245] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:17,238] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:17,245] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:17,254] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,254] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,255] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,255] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,257] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:17,273] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:17,282] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,282] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,285] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:17,287] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:17,261] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,262] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,291] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,274] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:17,265] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,265] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:17,281] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,297] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:17,302] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,303] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to mo[2025-08-18 20:28:17,290] [INFO] [config.py:733:__init__] Config mesh_deviYou are attempting to usYou are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU wYou are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +[2025-08-18 20:28:17,300] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,300] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,300] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:17,300] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:17,296] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:20,592] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:20,593] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:20,593] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:20,593] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:20,594] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:20,594] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:20,594] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:20,595] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:28:26,798] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:26,798] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:26,798] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:26,798] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:26,799] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:26,799] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:26,799] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:26,799] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:28:27,228] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:27,311] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:27,326] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:27,331] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:27,331] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:27,336] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:27,336] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:28:27,336] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:28:31,816] [INFO] [partition_parameters.py:348:__exit__] finished initializing model - num_params = 825, num_elems = 4.07B + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +W0818 20:31:23.121000 114600 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:31:23.121000 114600 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:31:23.121000 114600 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:31:23.121000 114600 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:31:23.173000 89718 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:31:23.173000 89718 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:31:23.173000 89718 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:31:23.173000 89718 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:31:23.210000 53471 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:31:23.210000 53471 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:31:23.210000 53471 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:31:23.210000 53471 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:31:23.243000 16091 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:31:23.243000 16091 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:31:23.243000 16091 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:31:23.243000 16091 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:31:23.365000 14193 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:31:23.365000 14193 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:31:23.365000 14193 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:31:23.365000 14193 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:31:23.458000 40824 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:31:23.458000 40824 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:31:23.458000 40824 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:31:23.458000 40824 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:31:23.963000 9250 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:31:23.963000 9250 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:31:23.963000 9250 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:31:23.963000 9250 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 20:31:35,564] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,568] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,618] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,628] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,628] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,625] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,631] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,633] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,634] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,636] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,645] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,636] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,645] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,645] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,645] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,645] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,640] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,640] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,641] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,641] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,641] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,641] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,653] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,653] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,664] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,666] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,666] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,667] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,667] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,667] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,667] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,667] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,667] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,669] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,669] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,670] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:35,670] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:36,316] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:36,317] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:36,375] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:36,375] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:36,375] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:36,375] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:36,376] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:36,376] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:38,760] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,760] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,771] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,760] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,760] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,761] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,761] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,762] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,767] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,760] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,760] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,760] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,760] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,760] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,761] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,766] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,766] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,766] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,766] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,767] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,767] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,767] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,767] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,767] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,767] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,767] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,767] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,767] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:38,767] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:39,184] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:39,184] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:39,185] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:39,185] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:39,185] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:39,186] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:39,176] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,188] [INFO] [comm.py:652:init_distributed] cdb=None +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,197] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,216] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,230] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,239] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,243] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,293] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,298] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,299] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,301] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,301] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,302] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,304] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,326] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,332] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,332] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,334] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,334] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,334] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,344] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,359] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,359] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,359] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,360] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,361] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,363] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,365] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,373] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,380] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,379] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,381] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,390] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,383] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,385] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,386] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,386] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,387] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to moYou are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,625] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,719] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:39,726] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:39,727] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:39,727] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:39,739] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:39,739] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:39,740] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:39,740] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:31:39,760] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:39,769] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,771] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,771] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,773] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,773] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:39,774] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:44,043] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:44,043] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-18 20:31:44,043] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:44,043] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:44,043] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:44,043] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:44,043] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:44,043] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:44,044] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:31:44,540] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:44,616] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:44,626] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:44,635] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:44,642] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:31:44,651] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:44,651] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:31:44,652] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +W0818 20:33:27.996000 21275 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:33:27.996000 21275 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:33:27.996000 21275 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:33:27.996000 21275 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +*** +W0818 20:33:28.133000 86009 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:33:28.133000 86009 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:33:28.133000 86009 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:33:28.133000 86009 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:33:28.254000 12200 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:33:28.254000 12200 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:33:28.254000 12200 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:33:28.254000 12200 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:33:28.314000 43806 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:33:28.314000 43806 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:33:28.314000 43806 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:33:28.314000 43806 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:33:28.454000 92746 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:33:28.454000 92746 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:33:28.454000 92746 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:33:28.454000 92746 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:33:30.529000 121863 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:33:30.529000 121863 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:33:30.529000 121863 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:33:30.529000 121863 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 20:33:41,968] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,012] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,022] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,028] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,028] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,032] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,032] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,035] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,035] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,035] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,035] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,035] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,036] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,040] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,043] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,043] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,053] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,053] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,053] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,053] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,044] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,044] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,048] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,048] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,048] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,050] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,050] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,056] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,056] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,057] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,057] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,070] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,070] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,063] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,070] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,071] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,071] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,063] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,063] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,063] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,087] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,088] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,088] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,089] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,089] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,089] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,089] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:42,089] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:44,011] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:44,012] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:44,012] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:44,016] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:44,018] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:44,018] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:44,018] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:44,019] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:33:45,103] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,107] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,105] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,105] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,105] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,106] [INFO] [comm.py:652:init_distributed] cdb=None + [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,116] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,116] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,118] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,106] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,106] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,106] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,107] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,111] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,109] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,108] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,111] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,111] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,111] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,111] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,112] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,112] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,112] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,125] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,127] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:45,528] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:45,534] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:45,551] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:45,566] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:45,582] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:45,589] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:45,645] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:45,653] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:45,658] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:45,659] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:45,662] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to mo[2025-08-18 20:33:45,666] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:45,664] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:45,670] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:45,671] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:45,672] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:45,672] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:45,693] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:45,694] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:45,694] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:45,694] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:45,696] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:45,696] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:45,697] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:45,698] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:45,709] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:45,721] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:45,724] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:45,726] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:45,742] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:45,731] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:45,733] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to mo[2025-08-18 20:33:45,735] [INFO] [config.py:733:__init__] Config mesh_deviYou are attempting to usYou are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to moYou are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:45,739] [INFO] [config.py:733:__init__] Config mesh_devi[2025-08-18 20:33:45,750] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:45,750You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:45,752] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:45,752] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:47,087] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:47,087] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:47,088] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:47,088] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:47,089] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:47,089] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:47,089] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:47,094] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:33:47,517] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:47,635] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:47,642] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:47,644] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:47,645] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:47,655] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:47,655] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:33:47,660] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:33:49,986] [INFO] [partition_parameters.py:348:__exit__] finished initializing model - num_params = 825, num_elems = 4.07B + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +W0818 20:34:53.933000 23929 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:34:53.933000 23929 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:34:53.933000 23929 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:34:53.933000 23929 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:34:53.929000 88350 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:34:53.929000 88350 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:34:53.929000 88350 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:34:53.929000 88350 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:34:53.968000 95172 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:34:53.968000 95172 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:34:53.968000 95172 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:34:53.968000 95172 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:34:53.982000 46104 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:34:53.982000 46104 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:34:53.982000 46104 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:34:53.982000 46104 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:34:54.054000 62369 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:34:54.054000 62369 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:34:54.054000 62369 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:34:54.054000 62369 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:34:54.108000 124714 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:34:54.108000 124714 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:34:54.108000 124714 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:34:54.108000 124714 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:34:54.117000 123543 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:34:54.117000 123543 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:34:54.117000 123543 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:34:54.117000 123543 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:34:54.468000 20772 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:34:54.468000 20772 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:34:54.468000 20772 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:34:54.468000 20772 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:35:23.466000 101463 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:35:23.466000 101463 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:35:23.466000 101463 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:35:23.466000 101463 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:35:23.479000 50426 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:35:23.479000 50426 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:35:23.479000 50426 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:35:23.479000 50426 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:35:23.647000 64666 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:35:23.647000 64666 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:35:23.647000 64666 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:35:23.647000 64666 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:35:23.780000 90474 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:35:23.780000 90474 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:35:23.780000 90474 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:35:23.780000 90474 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:35:23.912000 125761 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:35:23.912000 125761 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:35:23.912000 125761 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:35:23.912000 125761 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:35:24.347000 22847 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:35:24.347000 22847 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:35:24.347000 22847 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:35:24.347000 22847 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:35:24.500000 26082 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:35:24.500000 26082 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:35:24.500000 26082 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:35:24.500000 26082 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:35:24.814000 126856 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:35:24.814000 126856 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:35:24.814000 126856 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:35:24.814000 126856 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 20:35:33,631] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,690] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,690] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,692] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,692] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,699] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,699] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,699] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,699] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,703] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,705] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,705] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,705] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,706] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,706] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,706] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,708] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,723] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,716] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,716] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,717] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,717] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,718] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,718] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,718] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,718] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,728] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,728] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,728] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,729] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:33,729] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,358] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,358] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,365] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,367] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,367] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,383] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,384] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,385] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,574] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,575] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,575] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,575] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,575] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,575] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,575] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,575] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,600] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,632] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,632] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,632] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,636] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,636] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,636] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:34,636] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:35:36,635] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,628] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,629] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,629] [INFO] [comm.py:652:init_distributed] cdb=None +ing TorchBackend in DeepSpeed with backend nccl +[2025-08-18 20:35:36,632] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,632] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,634] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,632] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,632] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,632] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,643] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,643] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,643] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,644] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,636] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,637] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,637] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,638] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,639] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,641] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,642] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,642] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,642] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:36,643] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:37,034] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:37,035] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:37,039] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:37,041] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:37,041] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:37,041] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:37,042] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:37,042] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:37,049] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,061] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,079] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,082] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,169] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:37,169] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,175] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:37,178] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:37,178] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:37,178] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:37,179] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,185] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,197] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,188You are attempting to use Flash Attention 2.0 with a model not initialized[2025-08-18 20:35:37,190] [INFO] [comm.py:652:init_distributed] cdb=None +with `model.to('cuda')`. +[2025-08-18 20:35:37,190] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:37,191] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +[2025-08-18 20:35:37,199] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:37,199] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:37,199] [INFO] [comm.py:652:init_distributed] cdb=None +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,215] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:37,217] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,220] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:37,220] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:37,220] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:37,223] [INFO] [comm.py:652:init_distributed] cdb=None +d on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,226] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:37,227] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:37,234] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:37,236] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:37,254] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:37,254] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:35:37,515] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,647] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:37,648] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:37,651] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,653] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,658] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,662] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,666] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,680] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,675] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,815] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:37,815] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:37,816] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:37,816] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:37,816] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to moYou are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,834] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:37,835] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +e Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,822] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:37,823] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +[2025-08-18 20:35:37,836] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:37,841] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:38,082] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:38,083] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:38,085] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:38,089] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:38,089] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:38,090] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:35:38,093] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:35:42,042] [INFO] [partition_parameters.py:348:__exit__] finished initializing model - num_params = 825, num_elems = 4.07B + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +W0818 20:39:26.028000 3522 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:39:26.028000 3522 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:39:26.028000 3522 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:39:26.028000 3522 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +*** +W0818 20:39:26.195000 107035 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:39:26.195000 107035 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:39:26.195000 107035 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:39:26.195000 107035 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:39:26.512000 2355 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:39:26.512000 2355 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:39:26.512000 2355 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:39:26.512000 2355 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:39:26.774000 71754 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:39:26.774000 71754 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:39:26.774000 71754 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:39:26.774000 71754 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:39:33.053000 1394 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:39:33.053000 1394 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:39:33.053000 1394 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:39:33.053000 1394 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:39:35.274000 50615 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:39:35.274000 50615 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:39:35.274000 50615 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:39:35.274000 50615 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 20:39:53,493] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,515] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,529] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,529] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,554] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,554] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,554] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,554] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,559] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,559] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,560] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,560] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,564] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,564] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,564] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,564] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,564] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,564] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,564] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,578] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,575] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,576] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,576] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,577] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,577] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,577] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,577] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,577] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,602] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,604] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,604] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,604] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,604] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,605] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:53,605] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:54,428] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:54,428] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:54,429] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:54,429] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:54,430] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:54,431] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:54,432] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:54,432] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:39:56,631] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,631] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,632] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,632] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,632] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,632] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,632] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,633] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,640] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,640] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,640] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,640] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,640] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,641] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,641] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,641] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,838] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,838] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,838] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,838] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,838] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,838] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,838] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:56,841] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:57,056] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:57,063] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,067] [INFO] [config.py:733:__init__] Config mesh_devYou are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,072] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:57,078] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,077] [INFO] [comm.py:652:init_distributed] cdb=None +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,103] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:57,109] [INFO] [comm.py:652:init_distributed] cdb=None +ice None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,142] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:39:57,185] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,196] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,199] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,199] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,200] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,202] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,203] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,205] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,205] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,215] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,217] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,218] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,218] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,252] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,267] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,268] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,269] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,270] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,271] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,272] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,272] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,379] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,384] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,386] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,386] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,388] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,389] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,390] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,525] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,652] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,660] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,662] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,664] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:39:57,666] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,670] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:39:57,670] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:40:10,783] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:40:10,787] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:40:10,792] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:40:10,792] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:40:10,796] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:40:10,802] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:40:10,802] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:40:10,802] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:40:16,971] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:40:17,000] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:40:17,000] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:40:17,000] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:40:17,001] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:40:17,001] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:40:17,001] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:40:17,001] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:40:17,900] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:40:17,900] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:40:17,900] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:40:17,900] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:40:17,901] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:40:17,901] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:40:17,901] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:40:17,901] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:40:18,396] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:40:18,521] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:40:18,533] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:40:18,537] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:40:18,537] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:40:18,539] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:40:18,539] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:40:18,540] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:40:26,041] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:40:26,041] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:40:26,041] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:40:26,042] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:40:26,042] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:40:26,043] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:40:26,043] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-18 20:40:26,043] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:40:26,056] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:40:26,537] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:40:26,675] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:40:26,676] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:40:26,678] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:40:26,684] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:40:26,684] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:40:26,685] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:40:26,687] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +W0818 20:43:27.615000 109454 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:43:27.615000 109454 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:43:27.615000 109454 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:43:27.615000 109454 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:43:27.632000 116804 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:43:27.632000 116804 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:43:27.632000 116804 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:43:27.632000 116804 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:43:27.683000 39878 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:43:27.683000 39878 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:43:27.683000 39878 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:43:27.683000 39878 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:43:27.695000 10772 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:43:27.695000 10772 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:43:27.695000 10772 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:43:27.695000 10772 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:43:29.862000 83211 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:43:29.862000 83211 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:43:29.862000 83211 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:43:29.862000 83211 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:43:35.826000 68245 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 20:43:35.826000 68245 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 20:43:35.826000 68245 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 20:43:35.826000 68245 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 20:43:47,571] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,597] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,599] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,599] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,599] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,605] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,613] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,613] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,614] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,614] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,614] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,626] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,634] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,634] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,634] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,634] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,634] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,634] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,639] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,642] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,652] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,653] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,653] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,653] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,653] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,656] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,657] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,658] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,658] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,658] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,658] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,658] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,781] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,782] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,782] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,782] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,783] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,786] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,786] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:47,786] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:50,639] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,640] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,640] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,640] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,640] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,640] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,640] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,640] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,640] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,640] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,640] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,641] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,641] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,641] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,642] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,639] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,641] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,641] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,641] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,641] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,641] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,641] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,639] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,640] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-18 20:43:50,640] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,640] [INFO] [comm.py:652:init_distributed] cdb=None +it_distributed] cdb=None +[2025-08-18 20:43:50,644] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,645] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,646] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,646] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,646] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,646] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,769] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,769] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,769] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,770] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,770] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,776] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,776] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:50,778] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:51,070] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,074] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:51,094] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:51,102] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:51,116] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:51,193] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,193] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +[2025-08-18 20:43:51,198] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,202] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:51,201] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,202] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:51,204] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to mo[2025-08-18 20:43:51,210] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,210] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,211] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:51,221] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,222] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,223] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:51,228] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,230] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,233] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:51,235] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:51,252] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:51,251] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,255] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:51,253] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,256] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:51,261] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,256] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,257] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,258] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:51,260] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,260] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:51,270] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:51,397] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:51,405] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,406] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,407] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,408] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,408] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:51,409] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:54,060] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:54,102] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:54,102] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:54,104] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:54,116] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:54,116] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:54,116] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:54,116] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:43:58,624] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:58,624] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:58,624] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:58,624] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:58,624] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:58,624] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:58,624] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:58,625] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:43:59,052] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:59,169] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:59,170] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:59,174] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:59,176] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:43:59,177] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:59,180] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:43:59,181] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:44:06,724] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:44:06,724] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:44:06,726] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:44:06,728] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:44:06,732] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:44:06,732] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:44:06,898] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:44:06,898] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 20:44:13,975] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:44:13,975] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:44:13,975] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:44:13,975] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:44:13,975] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:44:13,975] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:44:13,975] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:44:13,976] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 20:44:14,451] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:44:14,570] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:44:14,571] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:44:14,575] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:44:14,580] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:44:14,583] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 20:44:14,587] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 20:44:14,588] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +W0818 21:04:17.565000 86871 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:04:17.565000 86871 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:04:17.565000 86871 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:04:17.565000 86871 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:04:17.589000 93173 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:04:17.589000 93173 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:04:17.589000 93173 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:04:17.589000 93173 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:04:17.593000 21973 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:04:17.593000 21973 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:04:17.593000 21973 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:04:17.593000 21973 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:04:22.085000 18973 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:04:22.085000 18973 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:04:22.085000 18973 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:04:22.085000 18973 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:04:22.092000 28843 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:04:22.092000 28843 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:04:22.092000 28843 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:04:22.092000 28843 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:04:22.137000 63263 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:04:22.137000 63263 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:04:22.137000 63263 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:04:22.137000 63263 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:04:25.812000 21839 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:04:25.812000 21839 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:04:25.812000 21839 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:04:25.812000 21839 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:04:27.209000 56404 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:04:27.209000 56404 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:04:27.209000 56404 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:04:27.209000 56404 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:05:38.299000 31763 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:05:38.299000 31763 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:05:38.299000 31763 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:05:38.299000 31763 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:05:38.299000 70355 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:05:38.299000 70355 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:05:38.299000 70355 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:05:38.299000 70355 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:05:38.379000 96213 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:05:38.379000 96213 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:05:38.379000 96213 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:05:38.379000 96213 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:05:38.664000 94098 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:05:38.664000 94098 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:05:38.664000 94098 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:05:38.664000 94098 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:05:38.722000 25620 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:05:38.722000 25620 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:05:38.722000 25620 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:05:38.722000 25620 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:05:38.784000 24751 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:05:38.784000 24751 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:05:38.784000 24751 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:05:38.784000 24751 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:05:38.827000 118403 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:05:38.827000 118403 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:05:38.827000 118403 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:05:38.827000 118403 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:05:39.495000 22523 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:05:39.495000 22523 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:05:39.495000 22523 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:05:39.495000 22523 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 21:05:54,071] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,072] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,081] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,081] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,084] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,084] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,084] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,084] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,087] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,088] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,088] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,096] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,098] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,098] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,098] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,098] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,123] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,124] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,124] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,125] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,125] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,126] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,126] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,126] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,130] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,130] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,131] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,131] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,131] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,131] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,132] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:54,132] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:05:58,020] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:05:58,018] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:05:58,018] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:05:58,018] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:05:58,018] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:05:58,018] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:05:58,016] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:05:58,016] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:05:58,020] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:05:58,020] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:05:58,020] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:05:58,020] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:05:58,020] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:05:58,020] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:05:58,020] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:05:58,022] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:05:58,022] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:05:58,475] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:05:58,497] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:05:58,498] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:05:58,505] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:05:58,599] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:05:58,606] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:05:58,607] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:05:58,609] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:05:58,609] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:05:58,610] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:05:58,610] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:05:58,629] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:05:58,630] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:05:58,633] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:05:58,635] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:05:58,636] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:05:58,636] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:05:58,636] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:05:58,645] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:05:58,655] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:05:58,654] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:05:58,657] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:05:58,657] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:05:58,657] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:05:58,657] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:05:58,658] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:05:58,660] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:06:03,489] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,489] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,490] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,490] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,491] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,492] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,492] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,492] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,500] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,500] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,500] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,501] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,503] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,503] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,503] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,503] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,504] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,504] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,506] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,508] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,508] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,508] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:03,509] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:11,660] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:11,658] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:11,660] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:11,660] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:11,660] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:11,660] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:11,660] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:11,660] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:11,655] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:11,655] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:11,656] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:11,656] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:11,657] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:11,658] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:11,658] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:11,659] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:11,658] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-18 21:06:11,658] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:11,940] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:11,941] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:11,941] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:11,941] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:11,943] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:11,945] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:11,945] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:11,945] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:06:12,139] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:06:12,149] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:06:12,156] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:06:12,234] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:06:12,251] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:06:12,252] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:06:12,257] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:06:12,259] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:06:12,262] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:06:12,263] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:06:12,278] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:06:12,285] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:06:12,279] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:06:12,279] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:06:12,290] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:06:12,291] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:06:12,292] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:06:12,294] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:06:12,294] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:06:12,294] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:06:12,296] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:06:12,296] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:06:21,878] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:21,878] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:21,878] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:21,878] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:21,878] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:21,879] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:21,879] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:21,881] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:06:22,388] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:06:22,513] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:06:22,513] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:06:22,514] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:06:22,517] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:06:22,518] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:06:22,519] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:06:22,520] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +W0818 21:12:11.059000 41583 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:12:11.059000 41583 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:12:11.059000 41583 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:12:11.059000 41583 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:12:11.312000 37401 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:12:11.312000 37401 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:12:11.312000 37401 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:12:11.312000 37401 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:12:12.805000 119444 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:12:12.805000 119444 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:12:12.805000 119444 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:12:12.805000 119444 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:12:13.388000 17957 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:12:13.388000 17957 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:12:13.388000 17957 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:12:13.388000 17957 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:12:16.101000 116411 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:12:16.101000 116411 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:12:16.101000 116411 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:12:16.101000 116411 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 21:12:27,234] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,234] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,234] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,240] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,247] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,251] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,260] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,251] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,261] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,261] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,261] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,262] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,262] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,262] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,262] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,277] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,277] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,277] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,278] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,281] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,281] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,281] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:27,281] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:30,437] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,438] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,439] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,439] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,439] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,443] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,443] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,445] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,444] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,444] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,444] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,444] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,444] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,444] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,445] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,457] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,457] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,457] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,457] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,457] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,457] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,459] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,460] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:30,918] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:30,926] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:12:31,064] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:31,068] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:12:31,075] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:31,073] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:31,077] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:31,073] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:31,078] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:31,078] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:31,079] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:12:31,082] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:12:31,460] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,461] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,462] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,467] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,469] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,470] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,470] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,470] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,482] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,482] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,483] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,483] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,481] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,484] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,484] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,482] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,482] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,489] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,489] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,489] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,489] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:31,489] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:34,749] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:34,749] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:34,749] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:34,749] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:34,749] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:34,749] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:34,749] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:34,750] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:34,759] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:34,759] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-18 21:12:34,759] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:34,759] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:34,760] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:34,760] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:34,760] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:34,760] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:34,760] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:35,233] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:12:35,261] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:12:35,378] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:12:35,390] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:35,391] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:35,391] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:35,392] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:35,392] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:35,392] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:12:35,405] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:35,406] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:35,409] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:35,411] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:12:35,416] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:35,416] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:35,416] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:12:35,460] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:35,460] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:35,461] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:35,461] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:35,461] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:35,461] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:35,461] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:35,463] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:35,953] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:12:36,060] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:36,062] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:12:36,075] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:36,076] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:36,078] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:36,079] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:36,079] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:12:39,611] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:39,611] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:39,611] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:39,611] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:39,611] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:39,612] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:39,612] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:39,612] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:39,648] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:39,654] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:39,669] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:39,669] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:39,670] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:39,673] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:39,673] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:39,673] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:12:45,170] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:45,170] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:45,170] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:45,170] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:45,170] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:45,170] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:45,170] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:45,171] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:45,612] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:12:45,746] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:45,747] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:45,748] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:45,749] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:45,749] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:45,749] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:12:45,751] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:12:46,165] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:46,165] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:46,165] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:46,165] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:46,165] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:46,165] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:46,165] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:46,165] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:12:46,651] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:12:46,789] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:46,792] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:46,793] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:12:46,794] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:46,794] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:46,795] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:12:46,797] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 621928 samples from VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl +Rank 0: Loading altas_windows +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 537672 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl +Rank 0: Loading altas_linux +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 21578 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl +Rank 0: Loading atlas_macos +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 3680 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl +Rank 0: Loading android_action_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 5621 samples from VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 11980 samples from VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl +Rank 0: Loading web_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9459 samples from VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 328 samples from VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 54 samples from VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 480 samples from VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 240 samples from VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 944 samples from VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 472 samples from VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading mac_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 1578 samples from VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20394 samples from VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl +Rank 0: Loading web_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 14285 samples from VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl +Rank 0: Loading android_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 3590 samples from VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 21422 samples from VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 100 samples from VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_aug_cropping_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 7675 samples from VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20116 samples from VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl +Rank 0: Loading iphone_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2520 samples from VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl +Rank 0: Loading mac_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7814 samples from VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl +Rank 0: Loading android_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 17838 samples from VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading android_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9008 samples from VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_canvas_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 624 samples from VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 100652 samples from VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl with all sampling strategy +Rank 0: Loaded 28346 samples from VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl +Rank 0: Loading android_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 4907 samples from VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2635 samples from VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5234 samples from VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading ubuntu_action_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl with all sampling strategy +Rank 0: Loaded 3404 samples from VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_1 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2855 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_2 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 655 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_3 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_4 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2643 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_1 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_2 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_3 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_4 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_5 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_6 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_7 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_8 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_crop_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 358 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7946 samples from VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3973 samples from VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 993 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 630 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 249 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2538 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1269 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 172 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl +Rank 0: Loading android_ocr_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl with all sampling strategy +Rank 0: Loaded 29878 samples from VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl +Rank 0: Loading mac_orc_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl with all sampling strategy +Rank 0: Loaded 4393 samples from VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 254 samples from VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_click_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl with random:80% sampling strategy +Rank 0: Loaded 1802 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 53 samples from VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 6578 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 3287 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 542 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_crop_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 270 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 10908 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 5453 samples from VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading android_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13950 samples from VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl +Rank 0: Loading windows_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 12390 samples from VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl +Rank 0: Loading web_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13402 samples from VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl +Rank 0: Loading icon_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl with all sampling strategy +Rank 0: Loaded 81303 samples from VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl +Rank 0: Loading mac_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 1251 samples from VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl +Rank 0: Loading iphone_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 27849 samples from VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl +Rank 0: Loading web_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl with random:80% sampling strategy +Rank 0: Loaded 51607 samples from VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl +Rank 0: Loading android_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl with random:80% sampling strategy +Rank 0: Loaded 6281 samples from VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl +Rank 0: Loading windows_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 25961 samples from VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl +Rank 0: Loading windows_cropping_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl with random:40% sampling strategy +Rank 0: Loaded 9763 samples from VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl +Rank 0: Loading iphone_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl with all sampling strategy +Rank 0: Loaded 16896 samples from VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl +Rank 0: Loading iphone_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl with all sampling strategy +Rank 0: Loaded 927 samples from VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl +Rank 0: Loading mac_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl with all sampling strategy +Rank 0: Loaded 3051 samples from VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl +Rank 0: Loading android_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 25217 samples from VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading android_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 12677 samples from VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading web_canvas_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl with random:40% sampling strategy +Rank 0: Loaded 1566 samples from VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl +Rank 0: Loading web_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 174170 samples from VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 24862 samples from VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl +Rank 0: Loading android_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl with random:80% sampling strategy +Rank 0: Loaded 9142 samples from VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl +Rank 0: Loading windows_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 4229 samples from VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 4200 samples from VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl with random:80% sampling strategy +Rank 0: Loaded 2868 samples from VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_crop_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1372 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 590 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl with all sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1692 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1077 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1070 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 431 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 292 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1509 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 1207 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading uibert_train_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 4646 samples from VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl +Rank 0: Loading openapp_taperception_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 2500 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_widget_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 14878 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_mug_grounding_d240812 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl with all sampling strategy +Rank 0: Loaded 26090 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl +Rank 0: Loading private_ui_phone_2403_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 24798 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ground_d240521_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl with all sampling strategy +Rank 0: Loaded 5008 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl +Rank 0: Loading private_ui_aig_share_2406_ground_d240612_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl with all sampling strategy +Rank 0: Loaded 7903 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl +Rank 0: Loading windows_pc_agent_e_planning_cot +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 55564 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl +Rank 0: Loading windows_pc_agent_e_navigation +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl with all sampling strategy +Rank 0: Loaded 27782 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl +Rank 0: Loading os_genesis_ac_training_data +Rank 0: Skipping os_genesis_ac_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_aw_training_data +Rank 0: Skipping os_genesis_aw_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_web_training +Rank 0: Skipping os_genesis_web_training due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_1 +Rank 0: Skipping gui_odyssey_plus_1 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_2 +Rank 0: Skipping gui_odyssey_plus_2 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_custom_3 +Rank 0: Skipping gui_odyssey_plus_custom_3 due to repeat_time=0 +Rank 0: Loading mm_gui_mid +Rank 0: Skipping mm_gui_mid due to repeat_time=0 +Rank 0: Loading text_gui_mid +Rank 0: Skipping text_gui_mid due to repeat_time=0 +Rank 0: Loading gui_mid_trajectory +Rank 0: Skipping gui_mid_trajectory due to repeat_time=0 +Rank 0: Loading ubuntu_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7024 samples from VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl +Rank 0: Loading windows_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3144 samples from VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl +Rank 0: Total training samples: 6240940 +Rank 0: Formatting inputs...Skip in lazy mode +Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. +Rank 0: Length of multimodal samples: 6230528, pure textual samples: 9984 +Parameter Offload: Total persistent parameters: 755712 in 408 params +W0818 21:20:41.628000 94266 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:20:41.628000 94266 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:20:41.628000 94266 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:20:41.628000 94266 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:20:41.629000 67964 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:20:41.629000 67964 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:20:41.629000 67964 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:20:41.629000 67964 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:20:41.756000 5751 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:20:41.756000 5751 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:20:41.756000 5751 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:20:41.756000 5751 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:20:41.793000 20992 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:20:41.793000 20992 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:20:41.793000 20992 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:20:41.793000 20992 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:20:42.013000 37847 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:20:42.013000 37847 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:20:42.013000 37847 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:20:42.013000 37847 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:20:42.666000 60556 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:20:42.666000 60556 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:20:42.666000 60556 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:20:42.666000 60556 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:20:47.310000 66849 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:20:47.310000 66849 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:20:47.310000 66849 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:20:47.310000 66849 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 21:21:04,206] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,224] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,226] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,244] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,251] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,255] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,255] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,256] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,262] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,264] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,270] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,265] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,269] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,271] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,288] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,284] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,289] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,289] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,289] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,285] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,285] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,292] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,295] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,295] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,297] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,297] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,294] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,294] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,294] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,305] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,305] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,306] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,306] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,306] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,307] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,307] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,307] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,312] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,313] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,313] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,313] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,313] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,313] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,314] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:04,314] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:07,418] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,418] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,419] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,419] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,419] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,420] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,420] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,420] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,420] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,419] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,419] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,419] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,419] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,419] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,420] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,416] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,420] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,420] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,421] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,421] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,421] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,425] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,424] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,425] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,415] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,416] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,417] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,419] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,420] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:07,432] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:07,509] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:07,510] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:07,510] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:07,515] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:07,516] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:07,516] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:07,516] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:07,858] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:07,891] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to mo[2025-08-18 21:21:07,905] [INFO] [config.py:733:__init__] Config mesh_deviYou are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:07,908] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:07,982] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:07,986] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:07,987] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:07,988] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:07,989] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:07,990] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:07,990] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:08,019] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:08,038] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:08,038] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:08,045] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:08,045] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:08,047] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +e Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:08,042] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:08,044] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:08,044] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:08,047] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:08,050] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:08,049] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:08,052] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to moYou are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ith `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:08,061] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:08,062] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:08,063] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:08,066] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:08,066] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:08,066] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:08,068] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:10,845] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:10,845] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:10,845] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:10,845] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:10,845] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:10,845] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:10,845] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:10,845] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:11,314] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:11,464] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:11,465] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:11,466] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:11,471] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:11,471] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:11,471] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:11,473] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:21,758] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:21,758] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:21,764] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:21,764] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:21,764] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:21,767] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:21,767] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:21,767] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:21:29,798] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:29,798] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:29,799] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:29,799] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:29,799] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:29,800] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:29,800] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:29,800] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-18 21:21:29,801] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:21:30,295] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:30,418] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:30,423] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:30,423] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:30,424] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:30,424] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:30,424] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:21:30,425] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:21:33,773] [INFO] [partition_parameters.py:348:__exit__] finished initializing model - num_params = 825, num_elems = 4.07B + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 621928 samples from VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl +Rank 0: Loading altas_windows +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl with random:50% sampling strategy +W0818 21:26:42.925000 34482 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:26:42.925000 34482 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:26:42.925000 34482 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:26:42.925000 34482 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +*** +W0818 21:26:44.148000 84327 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:26:44.148000 84327 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:26:44.148000 84327 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:26:44.148000 84327 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:26:44.184000 34182 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:26:44.184000 34182 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:26:44.184000 34182 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:26:44.184000 34182 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:26:44.206000 105157 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:26:44.206000 105157 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:26:44.206000 105157 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:26:44.206000 105157 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:28:43.289000 22976 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:28:43.289000 22976 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:28:43.289000 22976 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:28:43.289000 22976 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:28:43.285000 79663 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:28:43.285000 79663 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:28:43.285000 79663 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:28:43.285000 79663 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:28:43.299000 88629 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:28:43.299000 88629 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:28:43.299000 88629 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:28:43.299000 88629 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:28:43.845000 53611 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:28:43.845000 53611 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:28:43.845000 53611 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:28:43.845000 53611 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:28:43.961000 111905 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:28:43.961000 111905 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:28:43.961000 111905 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:28:43.961000 111905 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:28:44.027000 81212 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:28:44.027000 81212 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:28:44.027000 81212 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:28:44.027000 81212 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:28:44.359000 41290 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:28:44.359000 41290 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:28:44.359000 41290 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:28:44.359000 41290 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:28:44.382000 36935 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 21:28:44.382000 36935 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 21:28:44.382000 36935 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 21:28:44.382000 36935 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 21:28:57,050] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,078] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,076] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,077] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,077] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,077] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,088] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,085] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,096] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,086] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,097] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,097] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,097] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,104] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,104] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,105] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,105] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,107] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,107] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,107] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,107] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,109] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,114] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,114] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,117] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,117] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,117] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,117] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,116] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,117] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,118] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,119] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,119] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,119] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,119] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,132] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,132] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,133] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,133] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,137] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,137] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,137] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,137] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,153] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,156] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,156] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,156] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,157] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,157] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,157] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:57,157] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:58,171] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:58,171] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:58,172] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:58,172] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:58,173] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:58,173] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:58,173] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:28:58,173] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 21:29:00,360] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,372] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,360] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,362] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,371] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,373] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,373] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,373] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,363] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,364] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,370] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,370] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,371] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,370] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,371] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,371] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,371] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,373] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,373] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,373] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,373] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,373] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,373] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,373] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,373] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:00,813] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:00,837] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:00,846] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:00,852] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:00,857] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:00,941] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:00,951] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:00,953] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:00,954] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:00,955] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:00,957] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:00,957] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:00,974] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:00,984] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:00,985] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:00,985] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:00,987] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:00,987] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to moYou are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:00,991] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to moYou are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:00,996] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:01,000] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:01,001] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:01,004] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:01,004] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:00,998] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ce None world_size = 64 +[2025-08-18 21:29:01,001] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:01,012] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +ve the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:01,009] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:01,010] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:01,011] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:01,011] [INFO] [config.py:733:__You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initializYou are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:01,245] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:01,245] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:01,245] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:01,245] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-08-18 21:29:01,245] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:01,245] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:01,245] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:01,245] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:01,253] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 21:29:01,710] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:01,825] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:01,839] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:01,843] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:01,844] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:01,844] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:01,845] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 21:29:01,845] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 21:29:04,215] [INFO] [partition_parameters.py:348:__exit__] finished initializing model - num_params = 825, num_elems = 4.07B + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +W0818 22:51:24.287000 34585 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 22:51:24.287000 34585 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 22:51:24.287000 34585 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 22:51:24.287000 34585 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 22:51:24.857000 33042 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 22:51:24.857000 33042 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 22:51:24.857000 33042 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 22:51:24.857000 33042 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 22:51:26.192000 20290 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 22:51:26.192000 20290 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 22:51:26.192000 20290 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 22:51:26.192000 20290 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 22:51:26.439000 106906 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 22:51:26.439000 106906 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 22:51:26.439000 106906 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 22:51:26.439000 106906 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 22:51:28.655000 40287 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 22:51:28.655000 40287 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 22:51:28.655000 40287 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 22:51:28.655000 40287 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 22:51:28.986000 11130 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] +W0818 22:51:28.986000 11130 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +W0818 22:51:28.986000 11130 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0818 22:51:28.986000 11130 /mnt/hwfile/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/distributed/run.py:792] ***************************************** +[2025-08-18 22:51:46,016] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,016] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:45,999] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,000] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,009] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,048] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,048] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,049] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,049] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,050] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,050] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,050] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,046] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,046] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,046] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,046] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,046] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,074] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,076] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,076] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,077] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,077] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,077] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:46,077] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:48,427] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:48,451] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:48,451] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:48,469] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:48,474] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:48,475] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:48,475] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:48,602] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:51:49,468] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:49,470] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:49,470] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:49,471] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:49,471] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:49,472] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:49,472] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:49,472] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:49,463] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:49,463] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:49,463] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:49,463] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:49,463] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:49,466] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:49,452] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:49,452] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:49,468] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:49,469] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:49,904] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:51:49,935] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:51:49,935] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:51:50,034] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:51:50,041] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:51:50,042] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:51:50,046] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:51:50,049] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:51:50,049] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:51:50,049] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:51:50,070] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:51:50,086] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:51:50,086] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:51:50,086] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:51:50,091] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:51:50,091] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:51:50,092] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:51:50,087] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:51:50,092] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:51:50,092] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:51:50,093] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:51:50,094] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:51:53,722] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:53,723] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:53,723] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:53,725] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:53,728] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:53,733] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:53,736] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:53,740] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:51:54,282] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:51:54,492] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:51:54,497] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:51:54,499] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:51:54,502] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:51:54,504] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:51:54,506] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:51:54,509] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:52:00,015] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,015] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,016] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,016] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,020] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,020] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,020] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,020] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,034] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,034] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,034] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,038] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,043] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,043] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,043] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,058] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,059] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,059] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,060] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,060] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,061] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,061] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,061] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,062] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,062] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,062] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,063] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,063] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,063] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,063] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:00,066] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-08-18 22:52:07,395] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:52:07,380] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:52:07,395] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:52:07,395] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:52:07,395] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:52:07,395] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:52:07,381] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:52:07,381] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:52:07,398] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:52:07,399] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:52:07,399] [INFO] [comm.py:652:init_distributed] cdb=None + [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:52:07,392] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:52:07,393] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:52:07,393] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:52:07,393] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:52:07,394] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:52:07,395] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:52:07,396] [INFO] [comm.py:652:init_distributed] cdb=None +[2025-08-18 22:52:07,859] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:52:07,892] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:52:07,952] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:52:07,951] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:52:07,967] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:52:07,961] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:52:07,962] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:52:07,962] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:52:07,962] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:52:07,963] [INFO] [config.py:733:__init__] Config mesh_devi[2025-08-18 22:52:07,979] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:52:07,979You are attempting to use Flash Attention 2.0 with a model not initializedYou are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:52:07,992] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:52:07,992] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:52:07,993] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:52:07,994] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:52:07,995] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:52:07,995] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:52:07,999] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:52:08,013] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:52:08,024] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:52:08,028] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:52:08,029] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:52:08,032] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2025-08-18 22:52:08,033] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +[2025-08-18 22:52:08,034] [INFO] [config.py:733:__init__] Config mesh_device None world_size = 64 +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/2 [00:00 before Client(conf_path) +Rank 0: --> after Client(conf_path) +Rank 0: Loading datasets: /mnt/petrelfs/liuzhaoyang/workspace/GUIAgent/internvl_chat/data/internvl_meta/meta/meta_250816_1.json +Rank 0: Loading guienv +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl with all sampling strategy +Rank 0: Loaded 327972 samples from VC:s3://gui/new_annotations/aguvis/stage1/guienv_202507011.jsonl +Rank 0: Loading omniact +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl with all sampling strategy +Rank 0: Loaded 6720 samples from VC:s3://gui/new_annotations/aguvis/stage1/omniact_fix_202507011.jsonl +Rank 0: Loading ricoig16k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16133 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricoig16k_202507011.jsonl +Rank 0: Loading ricosca +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl with all sampling strategy +Rank 0: Loaded 173212 samples from VC:s3://gui/new_annotations/aguvis/stage1/ricosca_202507011.jsonl +Rank 0: Loading seeclick +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl with all sampling strategy +Rank 0: Loaded 271121 samples from VC:s3://gui/new_annotations/aguvis/stage1/seeclick_202507011.jsonl +Rank 0: Loading ui_refexp +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl with all sampling strategy +Rank 0: Loaded 15624 samples from VC:s3://gui/new_annotations/aguvis/stage1/ui_refexp_202507011.jsonl +Rank 0: Loading webui350k +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl with all sampling strategy +Rank 0: Loaded 57389 samples from VC:s3://gui/new_annotations/aguvis/stage1/webui350k_202507011.jsonl +Rank 0: Loading widget_captioning +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl with all sampling strategy +Rank 0: Loaded 101426 samples from VC:s3://gui/new_annotations/aguvis/stage1/widget_captioning_202507011.jsonl +Rank 0: Loading aitw-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l1_202507011.jsonl +Rank 0: Loading aitw-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l2_202507011.jsonl +Rank 0: Loading aitw-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 18992 samples from VC:s3://gui/new_annotations/aguvis/stage2/aitw-l3_202507011.jsonl +Rank 0: Loading amex-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l1_202507011.jsonl +Rank 0: Loading amex-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l2_202507011.jsonl +Rank 0: Loading amex-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 38469 samples from VC:s3://gui/new_annotations/aguvis/stage2/amex-l3_202507011.jsonl +Rank 0: Loading android_control +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 149428 samples from VC:s3://gui/new_annotations/aguvis/stage2/android_control_202507011.jsonl +Rank 0: Loading coat +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl with all sampling strategy +Rank 0: Loaded 11833 samples from VC:s3://gui/new_annotations/aguvis/stage2/coat_filtered_202507011.jsonl +Rank 0: Loading guiact-web-multi-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l1_202507011.jsonl +Rank 0: Loading guiact-web-multi-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l2_202507011.jsonl +Rank 0: Loading guiact-web-multi-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 16704 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-multi-l3_202507011.jsonl +Rank 0: Loading guiact-web-single +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl with all sampling strategy +Rank 0: Loaded 67396 samples from VC:s3://gui/new_annotations/aguvis/stage2/guiact-web-single_202507013.jsonl +Rank 0: Loading guide +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl with all sampling strategy +Rank 0: Loaded 13544 samples from VC:s3://gui/new_annotations/aguvis/stage2/guide_202507011.jsonl +Rank 0: Loading gui-odyssey-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l1_202507011.jsonl +Rank 0: Loading gui-odyssey-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l2_202507011.jsonl +Rank 0: Loading gui-odyssey-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 118282 samples from VC:s3://gui/new_annotations/aguvis/stage2/gui-odyssey-l3_202507011.jsonl +Rank 0: Loading mind2web-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l1_202507011.jsonl +Rank 0: Loading mind2web-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l2_202507011.jsonl +Rank 0: Loading mind2web-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 7591 samples from VC:s3://gui/new_annotations/aguvis/stage2/mind2web-l3_202507011.jsonl +Rank 0: Loading miniwob-l1 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l1_202507011.jsonl +Rank 0: Loading miniwob-l2 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l2_202507011.jsonl +Rank 0: Loading miniwob-l3 +Rank 0: Loading VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl with all sampling strategy +Rank 0: Loaded 9826 samples from VC:s3://gui/new_annotations/aguvis/stage2/miniwob-l3_202507011.jsonl +Rank 0: Loading aguvis_android_control-v2 +Rank 0: Skipping aguvis_android_control-v2 due to repeat_time=0 +Rank 0: Loading aguvis_coat-v2 +Rank 0: Skipping aguvis_coat-v2 due to repeat_time=0 +Rank 0: Loading aguvis_docvqa_grounding +Rank 0: Skipping aguvis_docvqa_grounding due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-multi +Rank 0: Skipping aguvis_guiact-web-multi due to repeat_time=0 +Rank 0: Loading aguvis_guiact-web-single-v2 +Rank 0: Skipping aguvis_guiact-web-single-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guide_si_10k-v2 +Rank 0: Skipping aguvis_guide_si_10k-v2 due to repeat_time=0 +Rank 0: Loading aguvis_guienv +Rank 0: Skipping aguvis_guienv due to repeat_time=0 +Rank 0: Loading aguvis_mind2web_train_v1.0.1 +Rank 0: Skipping aguvis_mind2web_train_v1.0.1 due to repeat_time=0 +Rank 0: Loading aguvis_omniact +Rank 0: Skipping aguvis_omniact due to repeat_time=0 +Rank 0: Loading aguvis_osatlas_ui_tars_cleaned +Rank 0: Skipping aguvis_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_ricoig16k +Rank 0: Skipping aguvis_ricoig16k due to repeat_time=0 +Rank 0: Loading aguvis_ricosca +Rank 0: Skipping aguvis_ricosca due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping aguvis_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading aguvis_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping aguvis_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading aguvis_ui_refexp +Rank 0: Skipping aguvis_ui_refexp due to repeat_time=0 +Rank 0: Loading aguvis_webui350k +Rank 0: Skipping aguvis_webui350k due to repeat_time=0 +Rank 0: Loading aguvis_widget_captioning +Rank 0: Skipping aguvis_widget_captioning due to repeat_time=0 +Rank 0: Loading icon_caption_icon_v0222_description +Rank 0: Skipping icon_caption_icon_v0222_description due to repeat_time=0 +Rank 0: Loading icon_grounding_icon_v0222_grounding +Rank 0: Skipping icon_grounding_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_final_1.5m +Rank 0: Skipping refusal_component_final_1.5m due to repeat_time=0 +Rank 0: Loading refusal_component_library_snap_icon_data_grounding +Rank 0: Skipping refusal_component_library_snap_icon_data_grounding due to repeat_time=0 +Rank 0: Loading refusal_component_v1_130k +Rank 0: Skipping refusal_component_v1_130k due to repeat_time=0 +Rank 0: Loading refusal_guienv +Rank 0: Skipping refusal_guienv due to repeat_time=0 +Rank 0: Loading refusal_icon_v0222_grounding +Rank 0: Skipping refusal_icon_v0222_grounding due to repeat_time=0 +Rank 0: Loading refusal_osatlas_ui_tars_cleaned +Rank 0: Skipping refusal_osatlas_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_ricosca +Rank 0: Skipping refusal_ricosca due to repeat_time=0 +Rank 0: Loading refusal_seeclick_mi_ui_tars_cleaned +Rank 0: Skipping refusal_seeclick_mi_ui_tars_cleaned due to repeat_time=0 +Rank 0: Loading refusal_seeclick_ui_tars_cleaned_fixed +Rank 0: Skipping refusal_seeclick_ui_tars_cleaned_fixed due to repeat_time=0 +Rank 0: Loading refusal_training_data_icon_grounded_merged +Rank 0: Skipping refusal_training_data_icon_grounded_merged due to repeat_time=0 +Rank 0: Loading component_generated_component_final_1.5m_cleaned_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 3987 samples from VC:s3://gui-agent/jedi/annotations_250713/component_final_1.5m_cleaned_split_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_description +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 11061 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_description_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_library_snap_icon_data_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 4424 samples from VC:s3://gui-agent/jedi/annotations_250713/component_library_snap_icon_data_grounding_conversations_20250713.jsonl +Rank 0: Loading component_generated_component_v1_130k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 26376 samples from VC:s3://gui-agent/jedi/annotations_250713/component_v1_130k_20250713.jsonl +Rank 0: Loading component_rule-based_doc_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 3153 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_doc_scroll_data_new +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 603 samples from VC:s3://gui-agent/jedi/annotations_250713/doc_scroll_data_new_20250713.jsonl +Rank 0: Loading component_rule-based_ethercalc_v1 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2012 samples from VC:s3://gui-agent/jedi/annotations_250713/ethercalc_v1_20250713.jsonl +Rank 0: Loading component_rule-based_slide_v1_17k +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl with random:20% sampling strategy +Rank 0: Loaded 2363 samples from VC:s3://gui-agent/jedi/annotations_250713/slide_v1_17k_20250713.jsonl +Rank 0: Loading icon_caption_ios_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 49498 samples from VC:s3://gui-agent/jedi/annotations_250713/ios_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_mac_app_data +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl with all sampling strategy +Rank 0: Loaded 18083 samples from VC:s3://gui-agent/jedi/annotations_250713/mac_app_data_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_caption_training_data_icon +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl with random:50% sampling strategy +Rank 0: Loaded 75874 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_pure_color_background_20250713.jsonl +Rank 0: Loading icon_grounding_training_data_icon_grounded_merged +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 5466 samples from VC:s3://gui-agent/jedi/annotations_250713/training_data_icon_conversations-images_grounded_merged_20250713.jsonl +Rank 0: Loading layout_layout200k_training_data_qwen25 +Rank 0: Skipping layout_layout200k_training_data_qwen25 due to repeat_time=0 +Rank 0: Loading layout_layout200k_grounding_training_data_qwen25 +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl with random:10% sampling strategy +Rank 0: Loaded 158612 samples from VC:s3://gui-agent/jedi/annotations_250713/layout200k_grounding_training_data_qwen25_20250713.jsonl +Rank 0: Loading layout_layout400k_claude_training_data_qwen25_split +Rank 0: Skipping layout_layout400k_claude_training_data_qwen25_split due to repeat_time=0 +Rank 0: Loading layout_layout400k_claude_grounding_training_data_qwen25_split +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7540 samples from VC:s3://gui-agent/jedi/annotations_250713/layout400k_claude_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading layout_os_layout_v1 +Rank 0: Skipping layout_os_layout_v1 due to repeat_time=0 +Rank 0: Loading layout_os_layout_v1_grounding +Rank 0: Loading VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl with random:30% sampling strategy +Rank 0: Loaded 7857 samples from VC:s3://gui-agent/jedi/annotations_250713/os_layout_v1_grounding_training_data_qwen25_split_20250713.jsonl +Rank 0: Loading mind2web_raw_image +Rank 0: Loading VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl with all sampling strategy +Rank 0: Loaded 5740 samples from VC:s3://gui-agent/mind2web_train/navigation_20250705.jsonl +Rank 0: Loading ws_android_navigation_20250328 +Rank 0: Skipping ws_android_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250407 +Rank 0: Skipping ws_android_navigation_20250407 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_w_history_20250328 +Rank 0: Skipping ws_web_navigation_w_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_wo_history_20250328 +Rank 0: Skipping ws_web_navigation_wo_history_20250328 due to repeat_time=0 +Rank 0: Loading ws_web_navigation_20250421 +Rank 0: Skipping ws_web_navigation_20250421 due to repeat_time=0 +Rank 0: Loading ws_ubuntu_navigation_20250328 +Rank 0: Skipping ws_ubuntu_navigation_20250328 due to repeat_time=0 +Rank 0: Loading ws_android_navigation_20250505 +Rank 0: Skipping ws_android_navigation_20250505 due to repeat_time=0 +Rank 0: Loading internal_android_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 57522 samples from VC:s3://gui-agent/data_20250612/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 1342 samples from VC:s3://gui-agent/data_20250624/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 9258 samples from VC:s3://gui-agent/data_20250630/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_navigation_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 4490 samples from VC:s3://gui-agent/data_20250707/android/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 11614 samples from VC:s3://gui-agent/data_20250714/web/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_navigation_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/navigation_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 48814 samples from VC:s3://gui-agent/data_20250612/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 11970 samples from VC:s3://gui-agent/data_20250612/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19042 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 4926 samples from VC:s3://gui-agent/data_20250612/mac/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8363 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3716 samples from VC:s3://gui-agent/data_20250612/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 26412 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 4420 samples from VC:s3://gui-agent/data_20250612/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250612 +Rank 0: Loading VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 115044 samples from VC:s3://gui-agent/data_20250612/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2684 samples from VC:s3://gui-agent/data_20250624/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 15766 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250624 +Rank 0: Loading VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7402 samples from VC:s3://gui-agent/data_20250624/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 19280 samples from VC:s3://gui-agent/data_20250630/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3746 samples from VC:s3://gui-agent/data_20250630/android/planning_20250811.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 3560 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 2469 samples from VC:s3://gui-agent/data_20250630/mac/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 18516 samples from VC:s3://gui-agent/data_20250630/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 420 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_mac_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 243 samples from VC:s3://gui-agent/data_20250707/mac/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 8898 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 1188 samples from VC:s3://gui-agent/data_20250707/windows/planning_20250811.jsonl +Rank 0: Loading internal_android_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 8979 samples from VC:s3://gui-agent/data_20250707/android/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_android_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 1599 samples from VC:s3://gui-agent/data_20250707/android/planning_20250811.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 21026 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2657 samples from VC:s3://gui-agent/data_20250707/ubuntu/planning_20250811.jsonl +Rank 0: Loading internal_windows_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 22154 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_windows_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2383 samples from VC:s3://gui-agent/data_20250714/windows/planning_20250811.jsonl +Rank 0: Loading internal_web_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl with all sampling strategy +Rank 0: Loaded 23229 samples from VC:s3://gui-agent/data_20250714/web/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_boost_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl with random:50% sampling strategy +Rank 0: Loaded 16767 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250720_boost_instruction.jsonl +Rank 0: Loading internal_ubuntu_planning_cot_instruction_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl with all sampling strategy +Rank 0: Loaded 2389 samples from VC:s3://gui-agent/data_20250714/ubuntu/planning_20250811.jsonl +Rank 0: Loading private_aig_share_0815_logo_oral_operation_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_oral_operation_d240924_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_region_caption_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_region_caption_d240924_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 20293 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ui_operation_oral_wbox_d241023_v2.jsonl +Rank 0: Loading private_ui_phone_comment_20240606_json_d20241023_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl with all sampling strategy +Rank 0: Loaded 1055 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_comment_20240606_json_d20241023_v2.jsonl +Rank 0: Loading private_ui_internal_aig_json_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6837 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_json_d241126.jsonl +Rank 0: Loading private_ui_internal_aig_xml_d241126 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl with repeat:3 sampling strategy +Rank 0: Loaded 6873 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_internal_aig_xml_d241126.jsonl +Rank 0: Loading OS_Altas_androidworld_grounding_d241120_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl with all sampling strategy +Rank 0: Loaded 89860 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/OS_Altas_androidworld_grounding_d241120_v1.jsonl +Rank 0: Loading private_ui_aig_share_long_caption_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl with repeat:4 sampling strategy +Rank 0: Loaded 3156 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_long_caption_20240604_v1.jsonl +Rank 0: Loading aw_1218_grounding +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/grounding_new.jsonl +Rank 0: Loading aw_1218_regioncaption +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/regioncaption_new.jsonl +Rank 0: Loading aw_1218_oral_operation +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl with all sampling strategy +Rank 0: Loaded 863 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/oral_operation_new.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl with all sampling strategy +Rank 0: Loaded 6600 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240812_grounding_dataset_20240812_v1_r6600.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 24620 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d20240604_v2 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl with all sampling strategy +Rank 0: Loaded 17196 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d20240604_v2.jsonl +Rank 0: Loading private_ui_phone_2403_long_caption_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 5998 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_long_caption_d240430_v1.jsonl +Rank 0: Loading private_ui_phone_2403_ocr_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 31276 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_phone_2403_ocr_d240430_v1.jsonl +Rank 0: Loading screen_qa_with_bbox_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 62401 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_with_bbox_d240430_v1.jsonl +Rank 0: Loading screenai_layout_20240604_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl with all sampling strategy +Rank 0: Loaded 22076 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screenai_layout_20240604_v1.jsonl +Rank 0: Loading amex_grounding_d240813_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl with all sampling strategy +Rank 0: Loaded 102007 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/amex_grounding_d240813_v1.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_1_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 63581 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_1_d240815_v3.jsonl +Rank 0: Loading guicourse_guienv_text_grounding_2_d240815_v3 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl with all sampling strategy +Rank 0: Loaded 6852 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/guicourse_guienv_text_grounding_2_d240815_v3.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_detection_d20240418_v1.jsonl +Rank 0: Loading private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_tablet_20240416_v7_ground_d20240416_v1.jsonl +Rank 0: Loading screen_qa_short_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 27880 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/screen_qa_short_d240430_v1.jsonl +Rank 0: Loading private_aig_share_0815_logo_grounding_d240924_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl with all sampling strategy +Rank 0: Loaded 1405 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_aig_share_0815_logo_grounding_d240924_v1.jsonl +Rank 0: Loading private_schedual_extract_20240520_v2_r464_reprompt_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 928 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_schedual_extract_20240520_v2_r464_reprompt_d240607.jsonl +Rank 0: Loading private_ui2json_app_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2488 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_app_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_os_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1242 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_os_d20240822_v1.jsonl +Rank 0: Loading private_ui2json_web_d20240822_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2360 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui2json_web_d20240822_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl with all sampling strategy +Rank 0: Loaded 3791 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_element_recognition_d240605_v1_correct_d240607.jsonl +Rank 0: Loading private_ui_aig_share_2405_marker_recognition_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5179 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_marker_recognition_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ocr_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5090 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_ocr_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_operation_oral_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5070 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_operation_oral_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl with all sampling strategy +Rank 0: Loaded 5248 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2405_visual_prompt_with_bbox_d240605_v1.jsonl +Rank 0: Loading private_ui_aig_share_2408_region_caption_d240903_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl with all sampling strategy +Rank 0: Loaded 5854 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_aig_share_2408_region_caption_d240903_v1.jsonl +Rank 0: Loading private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1 +Rank 0: Loading VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl with all sampling strategy +Rank 0: Loaded 2000 samples from VC:s3://gui/new_annotations/st_data/20250222/annotations/private_ui_homescreen_phone_20240416_v7_element_recognition_d20240416_v1.jsonl +Rank 0: Loading uground_web_direct_150k_description_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 74208 samples from VC:s3://gui/new_annotations/uground/web_direct_150k_description_filtered_202507011.jsonl +Rank 0: Loading uground_web_direct_258k_function_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 124132 samples from VC:s3://gui/new_annotations/uground/web_direct_258k_function_filtered_202507011.jsonl +Rank 0: Loading uground_web_hybrid_773k_max_25qa_filtered +Rank 0: Loading VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 621928 samples from VC:s3://gui/new_annotations/uground/web_hybrid_773k_max_25qa_filtered_new_202507011.jsonl +Rank 0: Loading altas_windows +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 537672 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_windows_splited_202507011.jsonl +Rank 0: Loading altas_linux +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 21578 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_linux_splited_202507011.jsonl +Rank 0: Loading atlas_macos +Rank 0: Loading VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 3680 samples from VC:s3://gui/new_annotations/OS-Atlas/windows_desktop/processed_macos_splited_202507011.jsonl +Rank 0: Loading android_action_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 5621 samples from VC:s3://gui/data_20250328/android/filter_action_grounding_20250405_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 11980 samples from VC:s3://gui-agent/data_20250328/windows/action_grounding_20250409_202507011_20250722.jsonl +Rank 0: Loading web_action_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9459 samples from VC:s3://gui-agent/data_20250328/web_25k/action_grounding_20250404_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 328 samples from VC:s3://gui/data_20250310/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 54 samples from VC:s3://gui/data_20250317/ubuntu/action_grounding_20250407_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 480 samples from VC:s3://gui/data_20250317/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 240 samples from VC:s3://gui/data_20250317/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 944 samples from VC:s3://gui/data_20250310/windows/action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 472 samples from VC:s3://gui/data_20250310/windows/crop_action_grounding_20250421_202507011_20250722.jsonl +Rank 0: Loading mac_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 1578 samples from VC:s3://gui-agent/data_20250407/mac/action_grounding_20250410_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20394 samples from VC:s3://gui-agent/data_20250407/iphone/white/action_grounding_20250410_202507011.jsonl +Rank 0: Loading web_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 14285 samples from VC:s3://gui-agent/data_20250407/web/action_grounding_20250414_202507011.jsonl +Rank 0: Loading android_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 3590 samples from VC:s3://gui-agent/data_20250407/android/action_grounding_20250410_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 21422 samples from VC:s3://gui-agent/data_20250407/windows/action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 100 samples from VC:s3://gui-agent/data_20250407/windows/human_action_grounding_20250416_202507011_20250722.jsonl +Rank 0: Loading windows_aug_cropping_action_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 7675 samples from VC:s3://gui-agent/data_20250407/windows/sub_action_grounding_20250421_202507011.jsonl +Rank 0: Loading iphone_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl with all sampling strategy +Rank 0: Loaded 20116 samples from VC:s3://gui-agent/data_20250414/iphone/action_grounding_20250417_202507011.jsonl +Rank 0: Loading iphone_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2520 samples from VC:s3://gui-agent/data_20250414/iphone/human_action_grounding_20250421_202507011.jsonl +Rank 0: Loading mac_human_action_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7814 samples from VC:s3://gui-agent/data_20250414/mac/human_action_grounding_20250418_202507011.jsonl +Rank 0: Loading android_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 17838 samples from VC:s3://gui-agent/data_20250421/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading android_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 9008 samples from VC:s3://gui-agent/data_20250428/Android/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_canvas_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl with random:20% sampling strategy +Rank 0: Loaded 624 samples from VC:s3://gui-agent/data_20250428/web_canvas/action_grounding_20250429_202507011.jsonl +Rank 0: Loading web_action_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 100652 samples from VC:s3://gui-agent/data_20250421/web/action_grounding_20250505_202507011.jsonl +Rank 0: Loading ubuntu_action_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl with all sampling strategy +Rank 0: Loaded 28346 samples from VC:s3://gui-agent/data_20250428/ubuntu/action_grounding_20250505_202507011.jsonl +Rank 0: Loading android_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl with random:50% sampling strategy +Rank 0: Loaded 4907 samples from VC:s3://gui-agent/data_20250505/android/action_grounding_20250506_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2635 samples from VC:s3://gui-agent/data_20250505/windows/action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5234 samples from VC:s3://gui-agent/data_20250505/windows/crop_action_grounding_20250508_202507011_20250722.jsonl +Rank 0: Loading ubuntu_action_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl with all sampling strategy +Rank 0: Loaded 3404 samples from VC:s3://gui-agent/data_20250508/ubuntu/action_grounding_20250509_202507011.jsonl +Rank 0: Loading windows_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250510_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250526_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_3 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250527_202507011_20250722.jsonl +Rank 0: Loading windows_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_crop_action_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 17050 samples from VC:s3://gui-agent/data_20250526/windows/crop_action_grounding_20250529_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_1 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2855 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250510_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_2 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 655 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250526_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_3 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250527_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250609_4 +Rank 0: Loading VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 2643 samples from VC:s3://gui-agent/data_20250609/windows/action_grounding_20250529_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_1 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_2 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_3 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_4 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_5 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5710 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250510_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_6 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1310 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250526_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_7 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1632 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250527_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250616_8 +Rank 0: Loading VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 5286 samples from VC:s3://gui-agent/data_20250616/windows_pure_paste/action_grounding_20250529_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3130 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250619_202507011_20250722.jsonl +Rank 0: Loading windows_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_crop_hover_action_grounding_20250623 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3333 samples from VC:s3://gui-agent/data_20250623/windows/crop_action_grounding_20250620_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 833 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1666 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250620_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 358 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 782 samples from VC:s3://gui-agent/data_20250623/windows_augment/action_grounding_20250619_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7946 samples from VC:s3://gui-agent/data_20250630/windows/action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 3973 samples from VC:s3://gui-agent/data_20250630/windows/crop_action_grounding_20250627_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 993 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1986 samples from VC:s3://gui-agent/data_20250630/windows_augment/action_grounding_20250627_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1990 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_action_grounding_20250630_202507011_20250722.jsonl +Rank 0: Loading windows_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl with all sampling strategy +Rank 0: Loaded 5040 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_action_grounding_20250703_202507011_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 630 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 1260 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/action_grounding_20250703_pure_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 249 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_concat_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_paste_202507011.jsonl +Rank 0: Loading windows_aug_action_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl with random:25% sampling strategy +Rank 0: Loaded 498 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/action_grounding_20250630_pure_paste_202507011.jsonl +Rank 0: Loading windows_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2538 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1269 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_action_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 172 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_aug_action_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl with random:25% sampling strategy +Rank 0: Loaded 634 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_action_grounding_20250708.jsonl +Rank 0: Loading windows_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/action_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_action_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2832 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_action_grounding_20250717_20250722.jsonl +Rank 0: Loading android_ocr_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl with all sampling strategy +Rank 0: Loaded 29878 samples from VC:s3://gui/data_20250328/android/text_ocr_20250409.jsonl +Rank 0: Loading mac_orc_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl with all sampling strategy +Rank 0: Loaded 4393 samples from VC:s3://gui/data_20250328/mac/element_ocr_20250328.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 254 samples from VC:s3://gui/data_20250310/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_click_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl with random:80% sampling strategy +Rank 0: Loaded 1802 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_function_20250421.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl with random:80% sampling strategy +Rank 0: Loaded 53 samples from VC:s3://gui/data_20250317/ubuntu/internvl_grounding_20250407.jsonl +Rank 0: Loading windows_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 6578 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 3287 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 542 samples from VC:s3://gui/data_20250317/windows/internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_crop_click_internvl_grounding_20250317 +Rank 0: Loading VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 270 samples from VC:s3://gui/data_20250317/windows/crop_internvl_grounding_function_20250421_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 10908 samples from VC:s3://gui/data_20250310/windows/internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250310 +Rank 0: Loading VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 5453 samples from VC:s3://gui/data_20250310/windows/crop_internvl_grounding_20250421_20250722.jsonl +Rank 0: Loading android_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13950 samples from VC:s3://gui/data_20250328/android/internvl_grounding_20250409.jsonl +Rank 0: Loading windows_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 12390 samples from VC:s3://gui-agent/data_20250328/windows/internvl_grounding_20250425_20250722.jsonl +Rank 0: Loading web_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl with random:80% sampling strategy +Rank 0: Loaded 13402 samples from VC:s3://gui/data_20250328/web_25k/internvl_grounding_20250409.jsonl +Rank 0: Loading icon_internvl_grounding_20250328 +Rank 0: Loading VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl with all sampling strategy +Rank 0: Loaded 81303 samples from VC:s3://gui/data_20250328/icon_canva/icon_anno_20250328.jsonl +Rank 0: Loading mac_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 1251 samples from VC:s3://gui-agent/data_20250407/mac/internvl_grounding_20250410.jsonl +Rank 0: Loading iphone_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl with all sampling strategy +Rank 0: Loaded 27849 samples from VC:s3://gui-agent/data_20250407/iphone/white/internvl_grounding_20250410.jsonl +Rank 0: Loading web_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl with random:80% sampling strategy +Rank 0: Loaded 51607 samples from VC:s3://gui-agent/data_20250407/web/internvl_grounding_20250414.jsonl +Rank 0: Loading android_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl with random:80% sampling strategy +Rank 0: Loaded 6281 samples from VC:s3://gui-agent/data_20250407/android/internvl_grounding_20250410.jsonl +Rank 0: Loading windows_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 25961 samples from VC:s3://gui-agent/data_20250407/windows/internvl_grounding_20250416_20250722.jsonl +Rank 0: Loading windows_cropping_internvl_grounding_20250407 +Rank 0: Loading VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl with random:40% sampling strategy +Rank 0: Loaded 9763 samples from VC:s3://gui-agent/data_20250407/windows/sub_internvl_grounding_20250421.jsonl +Rank 0: Loading iphone_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl with all sampling strategy +Rank 0: Loaded 16896 samples from VC:s3://gui-agent/data_20250414/iphone/internvl_grounding_20250417.jsonl +Rank 0: Loading iphone_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl with all sampling strategy +Rank 0: Loaded 927 samples from VC:s3://gui-agent/data_20250414/iphone/human_internvl_grounding_20250421.jsonl +Rank 0: Loading mac_human_internvl_grounding_20250414 +Rank 0: Loading VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl with all sampling strategy +Rank 0: Loaded 3051 samples from VC:s3://gui-agent/data_20250414/mac/human_internvl_grounding_20250418.jsonl +Rank 0: Loading android_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 25217 samples from VC:s3://gui-agent/data_20250421/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading android_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl with random:80% sampling strategy +Rank 0: Loaded 12677 samples from VC:s3://gui-agent/data_20250428/Android/internvl_grounding_20250429.jsonl +Rank 0: Loading web_canvas_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl with random:40% sampling strategy +Rank 0: Loaded 1566 samples from VC:s3://gui-agent/data_20250428/web_canvas/internvl_grounding_20250429.jsonl +Rank 0: Loading web_internvl_grounding_20250421 +Rank 0: Loading VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 174170 samples from VC:s3://gui-agent/data_20250421/web/internvl_grounding_20250505.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250428 +Rank 0: Loading VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl with random:80% sampling strategy +Rank 0: Loaded 24862 samples from VC:s3://gui-agent/data_20250428/ubuntu/internvl_grounding_20250505.jsonl +Rank 0: Loading android_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl with random:80% sampling strategy +Rank 0: Loaded 9142 samples from VC:s3://gui-agent/data_20250505/android/internvl_grounding_20250506.jsonl +Rank 0: Loading windows_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 4229 samples from VC:s3://gui-agent/data_20250505/windows/internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250505 +Rank 0: Loading VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 4200 samples from VC:s3://gui-agent/data_20250505/windows/crop_internvl_grounding_20250508_20250722.jsonl +Rank 0: Loading ubuntu_internvl_grounding_20250508 +Rank 0: Loading VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl with random:80% sampling strategy +Rank 0: Loaded 2868 samples from VC:s3://gui-agent/data_20250508/ubuntu/internvl_grounding_20250509.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_1 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 9494 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250510_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_2 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 2270 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250526_20250722.jsonl +Rank 0: Loading windows_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl with random:80% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_crop_internvl_grounding_20250526_4 +Rank 0: Loading VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 28466 samples from VC:s3://gui-agent/data_20250526/windows/crop_internvl_grounding_20250529_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1620 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250619_20250722.jsonl +Rank 0: Loading windows_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_crop_hover_internvl_grounding_20250523 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl with random:25% sampling strategy +Rank 0: Loaded 1714 samples from VC:s3://gui-agent/data_20250623/windows/crop_internvl_grounding_20250620_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_1 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1372 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_2 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_3 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2743 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250620_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_4 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 590 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_5 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250623_6 +Rank 0: Loading VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 1296 samples from VC:s3://gui-agent/data_20250623/windows_augment/internvl_grounding_20250619_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl with all sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 4230 samples from VC:s3://gui-agent/data_20250630/windows/crop_internvl_grounding_20250627_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_1 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1692 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 3384 samples from VC:s3://gui-agent/data_20250630/windows_augment/internvl_grounding_20250627_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1077 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_2 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_data_20250630/crop_internvl_grounding_20250630_20250722.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl with all sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250630_3 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 2676 samples from VC:s3://gui-agent/data_20250630/windows_data_20250703/crop_internvl_grounding_20250703_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_4 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 1070 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_5 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_6 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 2141 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250703/internvl_grounding_20250703_pure_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_7 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl with random:40% sampling strategy +Rank 0: Loaded 431 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_concat.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_8 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_paste.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250630_9 +Rank 0: Loading VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl with random:40% sampling strategy +Rank 0: Loaded 862 samples from VC:s3://gui-agent/data_20250630/windows_augment_data_20250630/internvl_grounding_20250630_pure_paste.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250707 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl with random:50% sampling strategy +Rank 0: Loaded 1344 samples from VC:s3://gui-agent/data_20250707/windows_data_20250707/crop_internvl_grounding_20250708_20250722.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_1 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 292 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/concat_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_2 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_aug_internvl_grounding_20250707_3 +Rank 0: Loading VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl with random:40% sampling strategy +Rank 0: Loaded 1075 samples from VC:s3://gui-agent/data_20250707/windows_augment_data_20250707/pure_paste_internvl_grounding_20250708.jsonl +Rank 0: Loading windows_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl with all sampling strategy +Rank 0: Loaded 1509 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading windows_crop_human_internvl_grounding_20250714 +Rank 0: Loading VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl with random:40% sampling strategy +Rank 0: Loaded 1207 samples from VC:s3://gui-agent/data_20250714/windows_data_20250714/crop_internvl_grounding_20250717_20250722.jsonl +Rank 0: Loading uibert_train_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 4646 samples from VC:s3://gui/new_annotations/gui_data_grounding/uibert_train_ground_d240430_v1.jsonl +Rank 0: Loading openapp_taperception_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 2500 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_taperception_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_widget_grounding_d240815_v2 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl with all sampling strategy +Rank 0: Loaded 14878 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_widget_grounding_d240815_v2.jsonl +Rank 0: Loading openapp_mug_grounding_d240812 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl with all sampling strategy +Rank 0: Loaded 26090 samples from VC:s3://gui/new_annotations/gui_data_grounding/openapp_mug_grounding_d240812.jsonl +Rank 0: Loading private_ui_phone_2403_ground_d240430_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl with all sampling strategy +Rank 0: Loaded 24798 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_phone_2403_ground_d240430_v1.jsonl +Rank 0: Loading private_ui_aig_share_2405_ground_d240521_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl with all sampling strategy +Rank 0: Loaded 5008 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2405_ground_d240521_v1.jsonl +Rank 0: Loading private_ui_aig_share_2406_ground_d240612_v1 +Rank 0: Loading VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl with all sampling strategy +Rank 0: Loaded 7903 samples from VC:s3://gui/new_annotations/gui_data_grounding/private_ui_aig_share_2406_ground_d240612_v1.jsonl +Rank 0: Loading windows_pc_agent_e_planning_cot +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 55564 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data.jsonl +Rank 0: Loading windows_pc_agent_e_navigation +Rank 0: Loading VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl with all sampling strategy +Rank 0: Loaded 27782 samples from VC:s3://gui-agent/data_20250609/pc_agent_e/pc_agent_e_training_data_without_think.jsonl +Rank 0: Loading os_genesis_ac_training_data +Rank 0: Skipping os_genesis_ac_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_aw_training_data +Rank 0: Skipping os_genesis_aw_training_data due to repeat_time=0 +Rank 0: Loading os_genesis_web_training +Rank 0: Skipping os_genesis_web_training due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_1 +Rank 0: Skipping gui_odyssey_plus_1 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_2 +Rank 0: Skipping gui_odyssey_plus_2 due to repeat_time=0 +Rank 0: Loading gui_odyssey_plus_custom_3 +Rank 0: Skipping gui_odyssey_plus_custom_3 due to repeat_time=0 +Rank 0: Loading mm_gui_mid +Rank 0: Skipping mm_gui_mid due to repeat_time=0 +Rank 0: Loading text_gui_mid +Rank 0: Skipping text_gui_mid due to repeat_time=0 +Rank 0: Loading gui_mid_trajectory +Rank 0: Skipping gui_mid_trajectory due to repeat_time=0 +Rank 0: Loading ubuntu_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 7024 samples from VC:s3://gui-agent/cua_text_rag/ubuntu_rag.jsonl +Rank 0: Loading windows_rag +Rank 0: Loading VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl with repeat:2 sampling strategy +Rank 0: Loaded 3144 samples from VC:s3://gui-agent/cua_text_rag/windows_rag.jsonl +Rank 0: Total training samples: 6240940 +Rank 0: Formatting inputs...Skip in lazy mode +Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. +Rank 0: Length of multimodal samples: 6230528, pure textual samples: 9984 +Parameter Offload: Total persistent parameters: 755712 in 408 params + 0%| | 0/12188 [00:00 + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f3075b3d7b0> +[Try #0] Failed to fetch sample 4385311 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f3075b3d7b0> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'HyperKitty'"}, {'from': 'gpt', 'value': '\nclick(x=0.4145, y=0.2765)\n'}]} + 91%|█████████▏| 11124/12188 [19:02<2:28:31, 8.38s/it] {'loss': 0.2839, 'grad_norm': 0.6880101939996542, 'learning_rate': 1.9890985911810322e-07, 'epoch': 0.91} + 91%|█████████▏| 11124/12188 [19:02<2:28:31, 8.38s/it] 91%|█████████▏| 11125/12188 [19:09<2:24:21, 8.15s/it] {'loss': 0.2792, 'grad_norm': 0.6611435576517619, 'learning_rate': 1.9853898562667174e-07, 'epoch': 0.91} + 91%|█████████▏| 11125/12188 [19:09<2:24:21, 8.15s/it] 91%|█████████▏| 11126/12188 [19:17<2:19:58, 7.91s/it] {'loss': 0.2889, 'grad_norm': 0.7651843177811886, 'learning_rate': 1.981684512067028e-07, 'epoch': 0.91} + 91%|█████████▏| 11126/12188 [19:17<2:19:58, 7.91s/it] 91%|█████████▏| 11127/12188 [19:24<2:17:00, 7.75s/it] {'loss': 0.3165, 'grad_norm': 0.7063201955641065, 'learning_rate': 1.9779825588436276e-07, 'epoch': 0.91} + 91%|█████████▏| 11127/12188 [19:24<2:17:00, 7.75s/it] 91%|█████████▏| 11128/12188 [19:32<2:18:50, 7.86s/it] {'loss': 0.2831, 'grad_norm': 0.7166086200810546, 'learning_rate': 1.9742839968579562e-07, 'epoch': 0.91} + 91%|█████████▏| 11128/12188 [19:32<2:18:50, 7.86s/it] 91%|█████████▏| 11129/12188 [19:40<2:20:22, 7.95s/it] {'loss': 0.2795, 'grad_norm': 0.7272325939136827, 'learning_rate': 1.9705888263711837e-07, 'epoch': 0.91} + 91%|█████████▏| 11129/12188 [19:40<2:20:22, 7.95s/it] 91%|█████████▏| 11130/12188 [19:48<2:16:54, 7.76s/it] {'loss': 0.3198, 'grad_norm': 0.7363807413938573, 'learning_rate': 1.9668970476442617e-07, 'epoch': 0.91} + 91%|█████████▏| 11130/12188 [19:48<2:16:54, 7.76s/it] 91%|█████████▏| 11131/12188 [19:55<2:16:53, 7.77s/it] {'loss': 0.2825, 'grad_norm': 0.8685586385926733, 'learning_rate': 1.9632086609379041e-07, 'epoch': 0.91} + 91%|█████████▏| 11131/12188 [19:55<2:16:53, 7.77s/it] 91%|█████████▏| 11132/12188 [20:03<2:15:45, 7.71s/it] {'loss': 0.3364, 'grad_norm': 0.6729068293537376, 'learning_rate': 1.9595236665125694e-07, 'epoch': 0.91} + 91%|█████████▏| 11132/12188 [20:03<2:15:45, 7.71s/it] 91%|█████████▏| 11133/12188 [20:13<2:26:02, 8.31s/it] {'loss': 0.2847, 'grad_norm': 0.7141158952086742, 'learning_rate': 1.9558420646284937e-07, 'epoch': 0.91} + 91%|█████████▏| 11133/12188 [20:13<2:26:02, 8.31s/it] 91%|█████████▏| 11134/12188 [20:22<2:31:32, 8.63s/it] {'loss': 0.3148, 'grad_norm': 0.671388230888706, 'learning_rate': 1.952163855545658e-07, 'epoch': 0.91} + 91%|█████████▏| 11134/12188 [20:22<2:31:32, 8.63s/it] 91%|█████████▏| 11135/12188 [20:30<2:25:07, 8.27s/it] {'loss': 0.308, 'grad_norm': 0.7163166672678231, 'learning_rate': 1.9484890395238155e-07, 'epoch': 0.91} + 91%|█████████▏| 11135/12188 [20:30<2:25:07, 8.27s/it] 91%|█████████▏| 11136/12188 [20:37<2:20:49, 8.03s/it] {'loss': 0.2926, 'grad_norm': 0.6620913463068657, 'learning_rate': 1.9448176168224863e-07, 'epoch': 0.91} + 91%|█████████▏| 11136/12188 [20:37<2:20:49, 8.03s/it] 91%|█████████▏| 11137/12188 [20:44<2:17:33, 7.85s/it] {'loss': 0.3276, 'grad_norm': 0.7310237576023542, 'learning_rate': 1.941149587700919e-07, 'epoch': 0.91} + 91%|█████████▏| 11137/12188 [20:44<2:17:33, 7.85s/it] 91%|█████████▏| 11138/12188 [20:52<2:14:46, 7.70s/it] {'loss': 0.3061, 'grad_norm': 0.6719753184940123, 'learning_rate': 1.9374849524181617e-07, 'epoch': 0.91} + 91%|█████████▏| 11138/12188 [20:52<2:14:46, 7.70s/it] 91%|█████████▏| 11139/12188 [20:59<2:13:05, 7.61s/it] {'loss': 0.2876, 'grad_norm': 0.7110357693715192, 'learning_rate': 1.933823711232996e-07, 'epoch': 0.91} + 91%|█████████▏| 11139/12188 [20:59<2:13:05, 7.61s/it] 91%|█████████▏| 11140/12188 [21:07<2:11:55, 7.55s/it] {'loss': 0.2736, 'grad_norm': 0.7633738260756525, 'learning_rate': 1.9301658644039712e-07, 'epoch': 0.91} + 91%|█████████▏| 11140/12188 [21:07<2:11:55, 7.55s/it] 91%|█████████▏| 11141/12188 [21:14<2:11:34, 7.54s/it] {'loss': 0.2947, 'grad_norm': 0.6973669268543403, 'learning_rate': 1.9265114121894135e-07, 'epoch': 0.91} + 91%|█████████▏| 11141/12188 [21:14<2:11:34, 7.54s/it] 91%|█████████▏| 11142/12188 [21:22<2:11:16, 7.53s/it] {'loss': 0.312, 'grad_norm': 0.7265682684983477, 'learning_rate': 1.922860354847378e-07, 'epoch': 0.91} + 91%|█████████▏| 11142/12188 [21:22<2:11:16, 7.53s/it] 91%|█████████▏| 11143/12188 [21:32<2:26:27, 8.41s/it] {'loss': 0.3032, 'grad_norm': 1.3213079836829484, 'learning_rate': 1.919212692635708e-07, 'epoch': 0.91} + 91%|█████████▏| 11143/12188 [21:32<2:26:27, 8.41s/it] 91%|█████████▏| 11144/12188 [21:40<2:23:34, 8.25s/it] {'loss': 0.2869, 'grad_norm': 0.7311004340338896, 'learning_rate': 1.915568425811981e-07, 'epoch': 0.91} + 91%|█████████▏| 11144/12188 [21:40<2:23:34, 8.25s/it] 91%|█████████▏| 11145/12188 [21:49<2:27:53, 8.51s/it] {'loss': 0.2788, 'grad_norm': 0.7224139881290137, 'learning_rate': 1.9119275546335637e-07, 'epoch': 0.91} + 91%|█████████▏| 11145/12188 [21:49<2:27:53, 8.51s/it] 91%|█████████▏| 11146/12188 [21:56<2:21:45, 8.16s/it] {'loss': 0.27, 'grad_norm': 0.7465645922246069, 'learning_rate': 1.9082900793575665e-07, 'epoch': 0.91} + 91%|█████████▏| 11146/12188 [21:56<2:21:45, 8.16s/it] 91%|█████████▏| 11147/12188 [22:04<2:17:28, 7.92s/it] {'loss': 0.278, 'grad_norm': 0.7400633987061468, 'learning_rate': 1.904656000240851e-07, 'epoch': 0.91} + 91%|█████████▏| 11147/12188 [22:04<2:17:28, 7.92s/it] 91%|█████████▏| 11148/12188 [22:13<2:21:33, 8.17s/it] {'loss': 0.3184, 'grad_norm': 0.7230530685052629, 'learning_rate': 1.901025317540056e-07, 'epoch': 0.91} + 91%|█████��███▏| 11148/12188 [22:13<2:21:33, 8.17s/it] 91%|█████████▏| 11149/12188 [22:22<2:25:59, 8.43s/it] {'loss': 0.3331, 'grad_norm': 0.675960299376606, 'learning_rate': 1.897398031511588e-07, 'epoch': 0.91} + 91%|█████████▏| 11149/12188 [22:22<2:25:59, 8.43s/it] 91%|█████████▏| 11150/12188 [22:30<2:23:53, 8.32s/it] {'loss': 0.2967, 'grad_norm': 0.7848120208910211, 'learning_rate': 1.8937741424115751e-07, 'epoch': 0.91} + 91%|█████████▏| 11150/12188 [22:30<2:23:53, 8.32s/it] 91%|█████████▏| 11151/12188 [22:38<2:23:59, 8.33s/it] {'loss': 0.2932, 'grad_norm': 0.6717026894063174, 'learning_rate': 1.8901536504959516e-07, 'epoch': 0.91} + 91%|█████████▏| 11151/12188 [22:38<2:23:59, 8.33s/it] 91%|█████████▏| 11152/12188 [22:45<2:18:36, 8.03s/it] {'loss': 0.3189, 'grad_norm': 0.664932907836154, 'learning_rate': 1.8865365560203797e-07, 'epoch': 0.91} + 91%|█████████▏| 11152/12188 [22:45<2:18:36, 8.03s/it] 92%|█████████▏| 11153/12188 [22:55<2:24:49, 8.40s/it] {'loss': 0.2904, 'grad_norm': 0.690621836871945, 'learning_rate': 1.8829228592402938e-07, 'epoch': 0.92} + 92%|█████████▏| 11153/12188 [22:55<2:24:49, 8.40s/it] 92%|█████████▏| 11154/12188 [23:02<2:21:02, 8.18s/it] {'loss': 0.291, 'grad_norm': 1.0322625913187795, 'learning_rate': 1.879312560410901e-07, 'epoch': 0.92} + 92%|█████████▏| 11154/12188 [23:02<2:21:02, 8.18s/it] 92%|█████████▏| 11155/12188 [23:10<2:16:48, 7.95s/it] {'loss': 0.2977, 'grad_norm': 0.7454802152204937, 'learning_rate': 1.8757056597871305e-07, 'epoch': 0.92} + 92%|█████████▏| 11155/12188 [23:10<2:16:48, 7.95s/it] 92%|█████████▏| 11156/12188 [23:17<2:14:34, 7.82s/it] {'loss': 0.3649, 'grad_norm': 0.7651793497618505, 'learning_rate': 1.8721021576237175e-07, 'epoch': 0.92} + 92%|█████████▏| 11156/12188 [23:17<2:14:34, 7.82s/it] 92%|█████████▏| 11157/12188 [23:25<2:14:16, 7.81s/it] {'loss': 0.2707, 'grad_norm': 0.7272753752748902, 'learning_rate': 1.8685020541751197e-07, 'epoch': 0.92} + 92%|█████████▏| 11157/12188 [23:25<2:14:16, 7.81s/it] 92%|█████████▏| 11158/12188 [23:32<2:12:03, 7.69s/it] {'loss': 0.2764, 'grad_norm': 0.705189845612724, 'learning_rate': 1.8649053496955838e-07, 'epoch': 0.92} + 92%|█████████▏| 11158/12188 [23:32<2:12:03, 7.69s/it] 92%|█████████▏| 11159/12188 [23:40<2:13:22, 7.78s/it] {'loss': 0.3137, 'grad_norm': 0.6965057558910952, 'learning_rate': 1.8613120444390952e-07, 'epoch': 0.92} + 92%|█████████▏| 11159/12188 [23:40<2:13:22, 7.78s/it] 92%|█████████▏| 11160/12188 [23:52<2:34:22, 9.01s/it] {'loss': 0.3672, 'grad_norm': 0.7121474424237527, 'learning_rate': 1.857722138659407e-07, 'epoch': 0.92} + 92%|█████████▏| 11160/12188 [23:52<2:34:22, 9.01s/it] 92%|█████████▏| 11161/12188 [24:00<2:26:30, 8.56s/it] {'loss': 0.2866, 'grad_norm': 0.7304445228206803, 'learning_rate': 1.8541356326100436e-07, 'epoch': 0.92} + 92%|█████████▏| 11161/12188 [24:00<2:26:30, 8.56s/it] 92%|█████████▏| 11162/12188 [24:07<2:21:17, 8.26s/it] {'loss': 0.309, 'grad_norm': 0.7228311666864288, 'learning_rate': 1.850552526544258e-07, 'epoch': 0.92} + 92%|█████████▏| 11162/12188 [24:07<2:21:17, 8.26s/it] 92%|█████████▏| 11163/12188 [24:15<2:20:05, 8.20s/it] {'loss': 0.279, 'grad_norm': 0.6861195223137565, 'learning_rate': 1.8469728207150982e-07, 'epoch': 0.92} + 92%|█████████▏| 11163/12188 [24:15<2:20:05, 8.20s/it] 92%|█████████▏| 11164/12188 [24:23<2:15:19, 7.93s/it] {'loss': 0.2917, 'grad_norm': 0.7630807386893559, 'learning_rate': 1.8433965153753563e-07, 'epoch': 0.92} + 92%|█████████▏| 11164/12188 [24:23<2:15:19, 7.93s/it] 92%|█████���███▏| 11165/12188 [24:30<2:12:35, 7.78s/it] {'loss': 0.3243, 'grad_norm': 0.753962437814923, 'learning_rate': 1.8398236107775802e-07, 'epoch': 0.92} + 92%|█████████▏| 11165/12188 [24:30<2:12:35, 7.78s/it] 92%|█████████▏| 11166/12188 [24:37<2:09:53, 7.63s/it] {'loss': 0.3038, 'grad_norm': 0.7292182178479899, 'learning_rate': 1.8362541071740846e-07, 'epoch': 0.92} + 92%|█████████▏| 11166/12188 [24:37<2:09:53, 7.63s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 90, in pil_loader + return img.convert("RGB") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 993, in convert + self.load() + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/ImageFile.py", line 319, in load + raise _get_oserror(err_code, encoder=False) +OSError: broken data stream when reading image file +[Try #0] Failed to fetch sample 6014818 in VC:s3://gui-agent/data_20250623/windows_augment/images. Exception: broken data stream when reading image file +Problematic sample: {'image': 'autocad/20250509_125727_1/images/before_screenshot_1_id_121_internvl_position_crop_1_grounding_instructions_point_o_paste.png', 'conversations': [{'from': 'human', 'value': "\nOnly respond with the coordinates: The 'CUSTOMIZE' text is located in the left side of the application window, within the Tool Palettes section. It's positioned at the top of what appears to be a customization panel, directly above the helper text that reads 'Press F1 for more help'."}, {'from': 'gpt', 'value': "The 'CUSTOMIZE' text is located in the left side of the application window, within the Tool Palettes section. It's positioned at the top of what appears to be a customization panel, directly above the helper text that reads 'Press F1 for more help'.[[495, 550, 515, 557]]"}], 'width': 3600, 'height': 2338} + 92%|█████████▏| 11167/12188 [24:48<2:24:37, 8.50s/it] {'loss': 0.3021, 'grad_norm': 0.6704179740482594, 'learning_rate': 1.832688004816946e-07, 'epoch': 0.92} + 92%|█████████▏| 11167/12188 [24:48<2:24:37, 8.50s/it] 92%|█████████▏| 11168/12188 [24:55<2:19:42, 8.22s/it] {'loss': 0.2643, 'grad_norm': 0.6983363922874645, 'learning_rate': 1.8291253039579905e-07, 'epoch': 0.92} + 92%|█████████▏| 11168/12188 [24:55<2:19:42, 8.22s/it] 92%|█████████▏| 11169/12188 [25:03<2:15:11, 7.96s/it] {'loss': 0.2979, 'grad_norm': 0.8452411253206148, 'learning_rate': 1.8255660048488223e-07, 'epoch': 0.92} + 92%|█████████▏| 11169/12188 [25:03<2:15:11, 7.96s/it] 92%|█████████▏| 11170/12188 [25:10<2:12:13, 7.79s/it] {'loss': 0.3348, 'grad_norm': 0.8109625656939726, 'learning_rate': 1.8220101077407738e-07, 'epoch': 0.92} + 92%|█████████▏| 11170/12188 [25:10<2:12:13, 7.79s/it] 92%|█████████▏| 11171/12188 [25:20<2:21:55, 8.37s/it] {'loss': 0.2865, 'grad_norm': 0.7071663711852987, 'learning_rate': 1.8184576128849773e-07, 'epoch': 0.92} + 92%|█████████▏| 11171/12188 [25:20<2:21:55, 8.37s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1348, in _get_item + raise ValueError( +ValueError: Number of image tokens ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'] does not match number of images None +[Try #0] Failed to fetch sample 1057917 in VC:s3://gui/aguvis/aguvis-stage2/amex/images. Exception: Number of image tokens ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'] does not match number of images None +Problematic sample: {'image': ['63eeb12752a6426abeb129b8049d5bddstep20.png', '63eeb12752a6426abeb129b8049d5bddstep21.png'], 'conversations': [{'from': 'human', 'value': "\nPlease generate the next move according to the UI screenshot, the task and previous operations.\n\nTask:\nI want to book a hotel in london, prize should be less than $600, guest rating is 8+, 4 star rating, breakfast included\n\nPrevious operations:\nStep 1: Tap on the Chrome app to start searching for hotels in London.\nStep 2: Type 'Chrome' in the search bar to open the Chrome browser.\nStep 3: Tap on the Hotels.com app icon to begin searching for a hotel.\nStep 4: Wait for the Hotels.com app to finish loading to proceed with the hotel search.\nStep 5: Tap on the 'Going to' field to enter London as the destination.\nStep 6: Tap on 'London' from Recent Searches to select it as the destination for the hotel search.\nStep 7: Tap on 'London, England, United Kingdom' to select it as the destination for the hotel search.\nStep 8: Tap on the 'Search' button to view available hotels in London.\nStep 9: Tap on the filter icon or option to apply filters to the hotel search results.\nStep 10: Swipe up on the screen to reveal more filter options, such as guest rating and hotel stars.\nStep 11: Swipe up to reveal more filter options, specifically guest ratings and hotel stars.\nStep 12: Tap on the 4-star rating box to select it.\nStep 13: Tap 'Done' to confirm the current filter selections and proceed.\nStep 14: Swipe up on the screen to locate and select the 'breakfast included' filter option.\nStep 15: Tap on 'Breakfast included' to select it.\nStep 16: Tap on the 'Done' button to apply the filters and view available hotel options.\nStep 17: Tap on the topmost hotel card to view its details.\nStep 18: Tap on the 'Select a room' button to check available room options and their prices.\nStep 19: Tap on 'Select and customize' to proceed with the room selection and customization options.\nStep 20: Tap on 'Breakfast buffet' to select it, then tap on 'Reserve' to book the room."}, {'from': 'gpt', 'value': "\nWait for the loading to complete to confirm the booking status.\n\n\nterminate(status='success')\n"}]} + 92%|█████████▏| 11172/12188 [25:27<2:16:33, 8.06s/it] {'loss': 0.2674, 'grad_norm': 0.6619586319323217, 'learning_rate': 1.814908520532288e-07, 'epoch': 0.92} + 92%|█████████▏| 11172/12188 [25:27<2:16:33, 8.06s/it] 92%|█████████▏| 11173/12188 [25:36<2:17:41, 8.14s/it] {'loss': 0.2723, 'grad_norm': 0.8184110438987262, 'learning_rate': 1.8113628309333497e-07, 'epoch': 0.92} + 92%|█████████▏| 11173/12188 [25:36<2:17:41, 8.14s/it] 92%|█████████▏| 11174/12188 [25:45<2:24:19, 8.54s/it] {'loss': 0.2919, 'grad_norm': 0.6975408489152973, 'learning_rate': 1.8078205443385456e-07, 'epoch': 0.92} + 92%|█████████▏| 11174/12188 [25:45<2:24:19, 8.54s/it] 92%|█████████▏| 11175/12188 [25:52<2:17:49, 8.16s/it] {'loss': 0.2725, 'grad_norm': 0.667828990298193, 'learning_rate': 1.804281660998025e-07, 'epoch': 0.92} + 92%|█████████▏| 11175/12188 [25:52<2:17:49, 8.16s/it] 92%|█████████▏| 11176/12188 [26:00<2:16:23, 8.09s/it] {'loss': 0.3599, 'grad_norm': 0.6990916261594601, 'learning_rate': 1.8007461811617e-07, 'epoch': 0.92} + 92%|█████████▏| 11176/12188 [26:00<2:16:23, 8.09s/it] 92%|█████████▏| 11177/12188 [26:08<2:13:56, 7.95s/it] {'loss': 0.2979, 'grad_norm': 0.6924810515610663, 'learning_rate': 1.797214105079248e-07, 'epoch': 0.92} + 92%|█████████▏| 11177/12188 [26:08<2:13:56, 7.95s/it] 92%|█████████▏| 11178/12188 [26:19<2:29:22, 8.87s/it] {'loss': 0.3303, 'grad_norm': 0.7043245378203861, 'learning_rate': 1.793685433000081e-07, 'epoch': 0.92} + 92%|█████████▏| 11178/12188 [26:19<2:29:22, 8.87s/it] 92%|█████████▏| 11179/12188 [26:28<2:28:47, 8.85s/it] {'loss': 0.3328, 'grad_norm': 0.7385129743358945, 'learning_rate': 1.7901601651734101e-07, 'epoch': 0.92} + 92%|█████████▏| 11179/12188 [26:28<2:28:47, 8.85s/it] 92%|█████████▏| 11180/12188 [26:35<2:21:49, 8.44s/it] {'loss': 0.2887, 'grad_norm': 0.6921634011960124, 'learning_rate': 1.7866383018481704e-07, 'epoch': 0.92} + 92%|█████████▏| 11180/12188 [26:35<2:21:49, 8.44s/it] 92%|█████████▏| 11181/12188 [26:46<2:33:31, 9.15s/it] {'loss': 0.2873, 'grad_norm': 0.6844293706559036, 'learning_rate': 1.7831198432730678e-07, 'epoch': 0.92} + 92%|█████████▏| 11181/12188 [26:46<2:33:31, 9.15s/it] 92%|█████████▏| 11182/12188 [26:54<2:27:03, 8.77s/it] {'loss': 0.2746, 'grad_norm': 0.6796220099640102, 'learning_rate': 1.7796047896965817e-07, 'epoch': 0.92} + 92%|█████████▏| 11182/12188 [26:54<2:27:03, 8.77s/it] 92%|█████████▏| 11183/12188 [27:02<2:21:29, 8.45s/it] {'loss': 0.2972, 'grad_norm': 0.6921495037304204, 'learning_rate': 1.7760931413669303e-07, 'epoch': 0.92} + 92%|█████████▏| 11183/12188 [27:02<2:21:29, 8.45s/it] 92%|█████████▏| 11184/12188 [27:11<2:28:25, 8.87s/it] {'loss': 0.3128, 'grad_norm': 0.7212800982725015, 'learning_rate': 1.7725848985321037e-07, 'epoch': 0.92} + 92%|█████████▏| 11184/12188 [27:11<2:28:25, 8.87s/it] 92%|█████████▏| 11185/12188 [27:19<2:22:23, 8.52s/it] {'loss': 0.2881, 'grad_norm': 0.6882332817040915, 'learning_rate': 1.7690800614398483e-07, 'epoch': 0.92} + 92%|█████████▏| 11185/12188 [27:19<2:22:23, 8.52s/it] 92%|█████████▏| 11186/12188 [27:26<2:15:44, 8.13s/it] {'loss': 0.3187, 'grad_norm': 0.6861585968057536, 'learning_rate': 1.7655786303376775e-07, 'epoch': 0.92} + 92%|█████████▏| 11186/12188 [27:26<2:15:44, 8.13s/it] 92%|█████████▏| 11187/12188 [27:34<2:12:10, 7.92s/it] {'loss': 0.2767, 'grad_norm': 0.6483065416102405, 'learning_rate': 1.7620806054728434e-07, 'epoch': 0.92} + 92%|█████████▏| 11187/12188 [27:34<2:12:10, 7.92s/it] 92%|█████████▏| 11188/12188 [27:41<2:08:47, 7.73s/it] {'loss': 0.2947, 'grad_norm': 0.6764015887033898, 'learning_rate': 1.758585987092376e-07, 'epoch': 0.92} + 92%|█████████▏| 11188/12188 [27:41<2:08:47, 7.73s/it] 92%|█████████▏| 11189/12188 [27:49<2:09:37, 7.79s/it] {'loss': 0.2881, 'grad_norm': 0.6933261171485027, 'learning_rate': 1.7550947754430615e-07, 'epoch': 0.92} + 92%|█████████▏| 11189/12188 [27:49<2:09:37, 7.79s/it] 92%|█████████▏| 11190/12188 [27:56<2:07:23, 7.66s/it] {'loss': 0.3162, 'grad_norm': 0.7525985161569397, 'learning_rate': 1.751606970771441e-07, 'epoch': 0.92} + 92%|█████████▏| 11190/12188 [27:56<2:07:23, 7.66s/it] 92%|█████████▏| 11191/12188 [28:04<2:06:13, 7.60s/it] {'loss': 0.272, 'grad_norm': 0.716739042668065, 'learning_rate': 1.7481225733238184e-07, 'epoch': 0.92} + 92%|█████████▏| 11191/12188 [28:04<2:06:13, 7.60s/it] 92%|█████████▏| 11192/12188 [28:12<2:11:12, 7.90s/it] {'loss': 0.2625, 'grad_norm': 0.7365274492026737, 'learning_rate': 1.7446415833462625e-07, 'epoch': 0.92} + 92%|█████████▏| 11192/12188 [28:12<2:11:12, 7.90s/it] 92%|█████████▏| 11193/12188 [28:20<2:09:02, 7.78s/it] {'loss': 0.3011, 'grad_norm': 0.8168158587579464, 'learning_rate': 1.7411640010845832e-07, 'epoch': 0.92} + 92%|█████████▏| 11193/12188 [28:20<2:09:02, 7.78s/it] 92%|█████████▏| 11194/12188 [28:29<2:13:19, 8.05s/it] {'loss': 0.2795, 'grad_norm': 0.6940720419308746, 'learning_rate': 1.7376898267843722e-07, 'epoch': 0.92} + 92%|█████████▏| 11194/12188 [28:29<2:13:19, 8.05s/it] 92%|█████████▏| 11195/12188 [28:36<2:10:08, 7.86s/it] {'loss': 0.2928, 'grad_norm': 0.8798386951359392, 'learning_rate': 1.734219060690967e-07, 'epoch': 0.92} + 92%|█████████▏| 11195/12188 [28:36<2:10:08, 7.86s/it] 92%|█████████▏| 11196/12188 [28:43<2:07:21, 7.70s/it] {'loss': 0.3227, 'grad_norm': 0.678240931096757, 'learning_rate': 1.730751703049466e-07, 'epoch': 0.92} + 92%|█████████▏| 11196/12188 [28:43<2:07:21, 7.70s/it] 92%|█████████▏| 11197/12188 [28:51<2:06:04, 7.63s/it] {'loss': 0.3271, 'grad_norm': 0.7234938157357826, 'learning_rate': 1.7272877541047284e-07, 'epoch': 0.92} + 92%|█████████▏| 11197/12188 [28:51<2:06:04, 7.63s/it] 92%|█████████▏| 11198/12188 [28:58<2:06:00, 7.64s/it] {'loss': 0.2736, 'grad_norm': 0.7028889109201336, 'learning_rate': 1.7238272141013702e-07, 'epoch': 0.92} + 92%|█████████▏| 11198/12188 [28:59<2:06:00, 7.64s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 899, in process_image_unified + visual_processed = processor.preprocess(image, return_tensors="pt") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 432, in preprocess + patches, image_grid_thw = self._preprocess( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 252, in _preprocess + resized_height, resized_width = smart_resize( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 69, in smart_resize + raise ValueError(f"height:{height} and width:{width} must be larger than factor:{factor}") +ValueError: height:21 and width:135 must be larger than factor:28 +[Try #0] Failed to fetch sample 2206265 in VC:s3://gui-agent/jedi/images/figma400k/figma400k_extracted/. Exception: height:21 and width:135 must be larger than factor:28 +Problematic sample: {'image': 'a4b739659e7c325d58fab0a3e55e135c875dc9a485e2b2af942dbafdf4662908.png', 'conversations': [{'from': 'human', 'value': "\nThe Date picker trigger button icon's intended function:\nActs as a date picker trigger that opens a calendar dropdown when clicked, allowing users to select dates visually rather than typing them manually. It's part of the browser's default date input functionality."}, {'from': 'gpt', 'value': '[[674, 238, 756, 762]]'}, {'from': 'human', 'value': "The spatial layout of this Date picker trigger button icon:\nLocated at the absolute right edge of a date input field, aligned vertically in the center. The triangle appears immediately adjacent to another UI control in the same date field.\n\nRegarding the Date picker trigger button icon's functionality:\nActs as a date picker trigger that opens a calendar dropdown when clicked, allowing users to select dates visually rather than typing them manually. It's part of the browser's default date input functionality."}, {'from': 'gpt', 'value': '[[674, 238, 756, 762]]'}, {'from': 'human', 'value': 'Location and arrangement of this Date picker trigger button icon:\nLocated at the absolute right edge of a date input field, aligned vertically in the center. The triangle appears immediately adjacent to another UI control in the same date field.'}, {'from': 'gpt', 'value': '[[674, 238, 756, 762]]'}, {'from': 'human', 'value': "The Button (Spinner Button)'s visual characteristics are as follows:\nA small downward-pointing arrow button that's part of a number input control. The arrow has a dark color and appears in a square container with border styling.\n\nPositional details of the Button (Spinner Button):\nThis button is positioned on the right side of a date input field, located in a row with two identical date input controls. Each date field has a dd/mm/yyyy format placeholder text."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[785, 190, 852, 857]]'}, {'from': 'human', 'value': "The visual appearance of this Button (Spinner Button) is as follows:\n\nVisual Description: A small downward-pointing arrow button that's part of a number input control. The arrow has a dark color and appears in a square container with border styling."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[785, 190, 852, 857]]'}, {'from': 'human', 'value': "This Button (Spinner Button) serves the following purpose:\nWhen clicked, this button decreases the value in the associated date input field. It's part of the browser's default date picker UI controls that allow users to adjust date values without typing."}, {'from': 'gpt', 'value': 'x1, y1, x2, y2: [[785, 190, 852, 857]]'}, {'from': 'human', 'value': "The User Profile Component's appearance can be described as:\nA compact user profile display showing the username 'NotTerry' accompanied by a small circular avatar with a cyan/blue profile picture\n\nThe User Profile Component's placement in the interface:\nLocated in the live chat section on the right side of the interface, integrated into the message stream, appearing as a message sender's identification"}, {'from': 'gpt', 'value': '[[785, 190, 852, 857]]'}, {'from': 'human', 'value': "The User Profile Component's appearance can be described as:\nA compact user profile display showing the username 'NotTerry' accompanied by a small circular avatar with a cyan/blue profile picture\n\nFunctional capabilities of the User Profile Component:\nRepresents a user's identity and serves as a header for their chat messages in the live chat interface"}, {'from': 'gpt', 'value': '[[785, 190, 852, 857]]'}, {'from': 'human', 'value': "The visual attributes of this User Profile Component are:\nA compact user profile display showing the username 'NotTerry' accompanied by a small circular avatar with a cyan/blue profile picture\n\nThis User Profile Component is positioned as follows:\nLocated in the live chat section on the right side of the interface, integrated into the message stream, appearing as a message sender's identification\n\nUsage and purpose of this User Profile Component:\nRepresents a user's identity and serves as a header for their chat messages in the live chat interface"}, {'from': 'gpt', 'value': '[[785, 190, 852, 857]]'}, {'from': 'human', 'value': "Here's a detailed visual description of the Navigation Button:\nA navigation icon featuring three small squares arranged in a grid pattern, located within a dark navy interface. The icon appears to use a faintly glowing blue color against the dark background.\n\nThis Navigation Button allows users to:\nA navigation button that likely serves as a 'MAIN' menu control, allowing users to access primary navigation options or return to the main view of the application."}, {'from': 'gpt', 'value': '[[0, 0, 1000, 1000]]'}, {'from': 'human', 'value': "The position of this Navigation Button can be described as:\nThe icon is positioned in the upper-left side of the interface, just below the RustyLoot logo. It's the first item in what appears to be a vertical navigation menu with other gaming-related options.\n\nThis Navigation Button serves the following purpose:\nA navigation button that likely serves as a 'MAIN' menu control, allowing users to access primary navigation options or return to the main view of the application."}, {'from': 'gpt', 'value': '[[0, 0, 1000, 1000]]'}, {'from': 'human', 'value': "The Navigation Button's appearance can be described as:\nA navigation icon featuring three small squares arranged in a grid pattern, located within a dark navy interface. The icon appears to use a faintly glowing blue color against the dark background.\n\nPositional details of the Navigation Button:\nThe icon is positioned in the upper-left side of the interface, just below the RustyLoot logo. It's the first item in what appears to be a vertical navigation menu with other gaming-related options."}, {'from': 'gpt', 'value': '[[0, 0, 1000, 1000]]'}]} + 92%|█████████▏| 11199/12188 [29:06<2:04:54, 7.58s/it] {'loss': 0.3238, 'grad_norm': 0.6874845792514387, 'learning_rate': 1.7203700832837844e-07, 'epoch': 0.92} + 92%|█████████▏| 11199/12188 [29:06<2:04:54, 7.58s/it] 92%|█████████▏| 11200/12188 [29:14<2:05:39, 7.63s/it] {'loss': 0.2907, 'grad_norm': 0.6628352257407423, 'learning_rate': 1.7169163618960817e-07, 'epoch': 0.92} + 92%|█████████▏| 11200/12188 [29:14<2:05:39, 7.63s/it] 92%|█████████▏| 11201/12188 [29:21<2:04:03, 7.54s/it] {'loss': 0.3021, 'grad_norm': 0.7002645128753517, 'learning_rate': 1.713466050182183e-07, 'epoch': 0.92} + 92%|█████████▏| 11201/12188 [29:21<2:04:03, 7.54s/it] 92%|█████████▏| 11202/12188 [29:29<2:04:23, 7.57s/it] {'loss': 0.299, 'grad_norm': 0.8354052123605867, 'learning_rate': 1.7100191483857275e-07, 'epoch': 0.92} + 92%|█████████▏| 11202/12188 [29:29<2:04:23, 7.57s/it] 92%|█████████▏| 11203/12188 [29:36<2:04:11, 7.56s/it] {'loss': 0.3119, 'grad_norm': 0.722330505346524, 'learning_rate': 1.7065756567501424e-07, 'epoch': 0.92} + 92%|█████████▏| 11203/12188 [29:36<2:04:11, 7.56s/it] 92%|█████████▏| 11204/12188 [29:44<2:05:26, 7.65s/it] {'loss': 0.2916, 'grad_norm': 0.6628553907140592, 'learning_rate': 1.7031355755185886e-07, 'epoch': 0.92} + 92%|█████████▏| 11204/12188 [29:44<2:05:26, 7.65s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 92%|█████████▏| 11205/12188 [29:51<2:03:11, 7.52s/it] {'loss': 0.7059, 'grad_norm': 0.5992450247589688, 'learning_rate': 1.6996989049340106e-07, 'epoch': 0.92} + 92%|█████████▏| 11205/12188 [29:51<2:03:11, 7.52s/it] 92%|█████████▏| 11206/12188 [30:03<2:24:18, 8.82s/it] {'loss': 0.3046, 'grad_norm': 0.7592193550353118, 'learning_rate': 1.6962656452390925e-07, 'epoch': 0.92} + 92%|█████████▏| 11206/12188 [30:03<2:24:18, 8.82s/it] 92%|█████████▏| 11207/12188 [30:10<2:16:45, 8.36s/it] {'loss': 0.3171, 'grad_norm': 0.7455404640776357, 'learning_rate': 1.6928357966762843e-07, 'epoch': 0.92} + 92%|█████████▏| 11207/12188 [30:10<2:16:45, 8.36s/it] 92%|█████████▏| 11208/12188 [30:18<2:12:58, 8.14s/it] {'loss': 0.2875, 'grad_norm': 0.6989159411021488, 'learning_rate': 1.689409359487809e-07, 'epoch': 0.92} + 92%|█████████▏| 11208/12188 [30:18<2:12:58, 8.14s/it] 92%|█████████▏| 11209/12188 [30:26<2:10:31, 8.00s/it] {'loss': 0.3047, 'grad_norm': 0.6905813105663163, 'learning_rate': 1.6859863339156235e-07, 'epoch': 0.92} + 92%|█████████▏| 11209/12188 [30:26<2:10:31, 8.00s/it] 92%|█████████▏| 11210/12188 [30:33<2:07:55, 7.85s/it] {'loss': 0.2719, 'grad_norm': 0.7935923729944225, 'learning_rate': 1.6825667202014617e-07, 'epoch': 0.92} + 92%|█████████▏| 11210/12188 [30:33<2:07:55, 7.85s/it] 92%|█████████▏| 11211/12188 [30:41<2:05:36, 7.71s/it] {'loss': 0.2826, 'grad_norm': 0.7806284687107553, 'learning_rate': 1.6791505185868085e-07, 'epoch': 0.92} + 92%|█████████▏| 11211/12188 [30:41<2:05:36, 7.71s/it] 92%|█████████▏| 11212/12188 [30:48<2:04:31, 7.66s/it] {'loss': 0.2872, 'grad_norm': 0.6759202247344804, 'learning_rate': 1.675737729312915e-07, 'epoch': 0.92} + 92%|█████████▏| 11212/12188 [30:48<2:04:31, 7.66s/it] 92%|█████████▏| 11213/12188 [30:56<2:03:36, 7.61s/it] {'loss': 0.3049, 'grad_norm': 1.1288871029894665, 'learning_rate': 1.6723283526207833e-07, 'epoch': 0.92} + 92%|█████████▏| 11213/12188 [30:56<2:03:36, 7.61s/it] 92%|█████████▏| 11214/12188 [31:03<2:02:31, 7.55s/it] {'loss': 0.2898, 'grad_norm': 0.6856095783914257, 'learning_rate': 1.6689223887511818e-07, 'epoch': 0.92} + 92%|█████████▏| 11214/12188 [31:03<2:02:31, 7.55s/it] 92%|█████████▏| 11215/12188 [31:10<2:01:42, 7.51s/it] {'loss': 0.3139, 'grad_norm': 0.7153111065605168, 'learning_rate': 1.6655198379446347e-07, 'epoch': 0.92} + 92%|█████████▏| 11215/12188 [31:10<2:01:42, 7.51s/it] 92%|█████████▏| 11216/12188 [31:18<2:02:20, 7.55s/it] {'loss': 0.3033, 'grad_norm': 0.7191451634516014, 'learning_rate': 1.6621207004414108e-07, 'epoch': 0.92} + 92%|█████████▏| 11216/12188 [31:18<2:02:20, 7.55s/it] 92%|█████████▏| 11217/12188 [31:25<2:01:03, 7.48s/it] {'loss': 0.2821, 'grad_norm': 0.719124035669612, 'learning_rate': 1.6587249764815628e-07, 'epoch': 0.92} + 92%|█████████▏| 11217/12188 [31:25<2:01:03, 7.48s/it] 92%|█████████▏| 11218/12188 [31:34<2:05:07, 7.74s/it] {'loss': 0.3018, 'grad_norm': 1.452160518598789, 'learning_rate': 1.6553326663048986e-07, 'epoch': 0.92} + 92%|█████████▏| 11218/12188 [31:34<2:05:07, 7.74s/it] 92%|█████████▏| 11219/12188 [31:42<2:08:14, 7.94s/it] {'loss': 0.3129, 'grad_norm': 0.9117506253789769, 'learning_rate': 1.6519437701509654e-07, 'epoch': 0.92} + 92%|█████████▏| 11219/12188 [31:42<2:08:14, 7.94s/it] 92%|█████████▏| 11220/12188 [31:51<2:13:15, 8.26s/it] {'loss': 0.275, 'grad_norm': 0.680658085115046, 'learning_rate': 1.6485582882590833e-07, 'epoch': 0.92} + 92%|█████████▏| 11220/12188 [31:51<2:13:15, 8.26s/it] 92%|█████████▏| 11221/12188 [31:59<2:10:00, 8.07s/it] {'loss': 0.3184, 'grad_norm': 0.7052165392069647, 'learning_rate': 1.6451762208683387e-07, 'epoch': 0.92} + 92%|█████████▏| 11221/12188 [31:59<2:10:00, 8.07s/it] 92%|█████████▏| 11222/12188 [32:07<2:09:38, 8.05s/it] {'loss': 0.2786, 'grad_norm': 0.7901783793045781, 'learning_rate': 1.6417975682175568e-07, 'epoch': 0.92} + 92%|█████████▏| 11222/12188 [32:07<2:09:38, 8.05s/it] 92%|█████████▏| 11223/12188 [32:16<2:16:43, 8.50s/it] {'loss': 0.3001, 'grad_norm': 0.7054444668489732, 'learning_rate': 1.6384223305453417e-07, 'epoch': 0.92} + 92%|█████████▏| 11223/12188 [32:16<2:16:43, 8.50s/it] 92%|█████████▏| 11224/12188 [32:24<2:13:14, 8.29s/it] {'loss': 0.2887, 'grad_norm': 0.7214858180495974, 'learning_rate': 1.6350505080900358e-07, 'epoch': 0.92} + 92%|█████████▏| 11224/12188 [32:24<2:13:14, 8.29s/it] 92%|█████████▏| 11225/12188 [32:32<2:09:15, 8.05s/it] {'loss': 0.3368, 'grad_norm': 0.826739836995761, 'learning_rate': 1.63168210108976e-07, 'epoch': 0.92} + 92%|█████████▏| 11225/12188 [32:32<2:09:15, 8.05s/it] 92%|█████████▏| 11226/12188 [32:39<2:06:06, 7.86s/it] {'loss': 0.3034, 'grad_norm': 0.7318023373426352, 'learning_rate': 1.6283171097823903e-07, 'epoch': 0.92} + 92%|█████████▏| 11226/12188 [32:39<2:06:06, 7.86s/it][2025-08-18 23:31:40,013] [WARNING] [stage3.py:2118:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time + 92%|█████████▏| 11227/12188 [32:52<2:31:16, 9.44s/it] {'loss': 0.3065, 'grad_norm': 0.729646299806955, 'learning_rate': 1.6249555344055533e-07, 'epoch': 0.92} + 92%|█████████▏| 11227/12188 [32:52<2:31:16, 9.44s/it] 92%|█████████▏| 11228/12188 [33:01<2:29:38, 9.35s/it] {'loss': 0.3196, 'grad_norm': 0.6913356817962049, 'learning_rate': 1.6215973751966262e-07, 'epoch': 0.92} + 92%|█████████▏| 11228/12188 [33:01<2:29:38, 9.35s/it] 92%|█████████▏| 11229/12188 [33:09<2:20:20, 8.78s/it] {'loss': 0.279, 'grad_norm': 0.6960751253149262, 'learning_rate': 1.6182426323927747e-07, 'epoch': 0.92} + 92%|█████████▏| 11229/12188 [33:09<2:20:20, 8.78s/it] 92%|█████████▏| 11230/12188 [33:18<2:22:00, 8.89s/it] {'loss': 0.2934, 'grad_norm': 0.8618209069150877, 'learning_rate': 1.6148913062308923e-07, 'epoch': 0.92} + 92%|█████████▏| 11230/12188 [33:18<2:22:00, 8.89s/it] 92%|█████████▏| 11231/12188 [33:28<2:27:19, 9.24s/it] {'loss': 0.252, 'grad_norm': 0.6406518716860364, 'learning_rate': 1.6115433969476567e-07, 'epoch': 0.92} + 92%|█████████▏| 11231/12188 [33:28<2:27:19, 9.24s/it] 92%|█████████▏| 11232/12188 [33:35<2:18:32, 8.70s/it] {'loss': 0.2432, 'grad_norm': 0.6392102293422011, 'learning_rate': 1.6081989047794732e-07, 'epoch': 0.92} + 92%|█████████▏| 11232/12188 [33:35<2:18:32, 8.70s/it] 92%|█████████▏| 11233/12188 [33:43<2:14:41, 8.46s/it] {'loss': 0.2976, 'grad_norm': 0.836608443596888, 'learning_rate': 1.6048578299625472e-07, 'epoch': 0.92} + 92%|█████████▏| 11233/12188 [33:43<2:14:41, 8.46s/it] 92%|█████████▏| 11234/12188 [33:53<2:20:11, 8.82s/it] {'loss': 0.2776, 'grad_norm': 0.6492510640608065, 'learning_rate': 1.6015201727328012e-07, 'epoch': 0.92} + 92%|█████████▏| 11234/12188 [33:53<2:20:11, 8.82s/it] 92%|█████████▏| 11235/12188 [34:00<2:12:45, 8.36s/it] {'loss': 0.2824, 'grad_norm': 0.9783766342457163, 'learning_rate': 1.598185933325941e-07, 'epoch': 0.92} + 92%|█████████▏| 11235/12188 [34:00<2:12:45, 8.36s/it] 92%|█████████▏| 11236/12188 [34:08<2:09:57, 8.19s/it] {'loss': 0.3565, 'grad_norm': 0.7894503230086999, 'learning_rate': 1.5948551119774336e-07, 'epoch': 0.92} + 92%|█████████▏| 11236/12188 [34:08<2:09:57, 8.19s/it] 92%|█████████▏| 11237/12188 [34:16<2:06:13, 7.96s/it] {'loss': 0.279, 'grad_norm': 0.7539995594651171, 'learning_rate': 1.59152770892248e-07, 'epoch': 0.92} + 92%|█████████▏| 11237/12188 [34:16<2:06:13, 7.96s/it] 92%|█████████▏| 11238/12188 [34:24<2:10:35, 8.25s/it] {'loss': 0.326, 'grad_norm': 0.6715661158807812, 'learning_rate': 1.5882037243960702e-07, 'epoch': 0.92} + 92%|█████████▏| 11238/12188 [34:24<2:10:35, 8.25s/it] 92%|█████████▏| 11239/12188 [34:32<2:09:08, 8.16s/it] {'loss': 0.3096, 'grad_norm': 0.9482750293672452, 'learning_rate': 1.5848831586329382e-07, 'epoch': 0.92} + 92%|█████████▏| 11239/12188 [34:32<2:09:08, 8.16s/it] 92%|█████████▏| 11240/12188 [34:40<2:05:12, 7.93s/it] {'loss': 0.315, 'grad_norm': 0.7705029835906967, 'learning_rate': 1.581566011867569e-07, 'epoch': 0.92} + 92%|█████████▏| 11240/12188 [34:40<2:05:12, 7.93s/it] 92%|█████████▏| 11241/12188 [34:47<2:03:06, 7.80s/it] {'loss': 0.2815, 'grad_norm': 0.8560409064645318, 'learning_rate': 1.5782522843342197e-07, 'epoch': 0.92} + 92%|█████████▏| 11241/12188 [34:47<2:03:06, 7.80s/it] 92%|█████████▏| 11242/12188 [34:57<2:10:53, 8.30s/it] {'loss': 0.2798, 'grad_norm': 0.7716120465924932, 'learning_rate': 1.574941976266897e-07, 'epoch': 0.92} + 92%|█████████▏| 11242/12188 [34:57<2:10:53, 8.30s/it] 92%|█████████▏| 11243/12188 [35:04<2:06:33, 8.04s/it] {'loss': 0.3104, 'grad_norm': 0.7046781578762867, 'learning_rate': 1.5716350878993703e-07, 'epoch': 0.92} + 92%|█████████▏| 11243/12188 [35:04<2:06:33, 8.04s/it] 92%|█████████▏| 11244/12188 [35:12<2:05:20, 7.97s/it] {'loss': 0.2715, 'grad_norm': 0.705683153091046, 'learning_rate': 1.5683316194651743e-07, 'epoch': 0.92} + 92%|█████████▏| 11244/12188 [35:12<2:05:20, 7.97s/it] 92%|█████████▏| 11245/12188 [35:19<2:01:58, 7.76s/it] {'loss': 0.3314, 'grad_norm': 0.7384033892575611, 'learning_rate': 1.5650315711975783e-07, 'epoch': 0.92} + 92%|█████████▏| 11245/12188 [35:19<2:01:58, 7.76s/it] 92%|█████████▏| 11246/12188 [35:27<1:59:58, 7.64s/it] {'loss': 0.3598, 'grad_norm': 0.7079310372752333, 'learning_rate': 1.5617349433296402e-07, 'epoch': 0.92} + 92%|█████████▏| 11246/12188 [35:27<1:59:58, 7.64s/it] 92%|█████████▏| 11247/12188 [35:36<2:07:00, 8.10s/it] {'loss': 0.3518, 'grad_norm': 0.7258826758902082, 'learning_rate': 1.5584417360941572e-07, 'epoch': 0.92} + 92%|█████████▏| 11247/12188 [35:36<2:07:00, 8.10s/it] 92%|█████████▏| 11248/12188 [35:44<2:06:31, 8.08s/it] {'loss': 0.3026, 'grad_norm': 0.7216110467071996, 'learning_rate': 1.5551519497236877e-07, 'epoch': 0.92} + 92%|█████████▏| 11248/12188 [35:44<2:06:31, 8.08s/it] 92%|█████████▏| 11249/12188 [35:51<2:02:34, 7.83s/it] {'loss': 0.2899, 'grad_norm': 0.6977563838547306, 'learning_rate': 1.5518655844505627e-07, 'epoch': 0.92} + 92%|█████████▏| 11249/12188 [35:51<2:02:34, 7.83s/it] 92%|█████████▏| 11250/12188 [36:02<2:15:45, 8.68s/it] {'loss': 0.2956, 'grad_norm': 0.703545816390553, 'learning_rate': 1.548582640506846e-07, 'epoch': 0.92} + 92%|█████████▏| 11250/12188 [36:02<2:15:45, 8.68s/it] 92%|█████████▏| 11251/12188 [36:10<2:12:10, 8.46s/it] {'loss': 0.3022, 'grad_norm': 0.6288482227595057, 'learning_rate': 1.5453031181243804e-07, 'epoch': 0.92} + 92%|█████████▏| 11251/12188 [36:10<2:12:10, 8.46s/it] 92%|█████████▏| 11252/12188 [36:17<2:06:20, 8.10s/it] {'loss': 0.2765, 'grad_norm': 0.6555838979655301, 'learning_rate': 1.5420270175347696e-07, 'epoch': 0.92} + 92%|█████████▏| 11252/12188 [36:17<2:06:20, 8.10s/it] 92%|█████████▏| 11253/12188 [36:25<2:04:46, 8.01s/it] {'loss': 0.2784, 'grad_norm': 0.7514918857230738, 'learning_rate': 1.538754338969345e-07, 'epoch': 0.92} + 92%|█████████▏| 11253/12188 [36:25<2:04:46, 8.01s/it] 92%|█████████▏| 11254/12188 [36:32<2:02:09, 7.85s/it] {'loss': 0.2793, 'grad_norm': 0.7172409743248405, 'learning_rate': 1.5354850826592383e-07, 'epoch': 0.92} + 92%|█████████▏| 11254/12188 [36:32<2:02:09, 7.85s/it] 92%|█████████▏| 11255/12188 [36:39<1:59:21, 7.68s/it] {'loss': 0.2637, 'grad_norm': 0.7536980807505492, 'learning_rate': 1.532219248835315e-07, 'epoch': 0.92} + 92%|█████████▏| 11255/12188 [36:39<1:59:21, 7.68s/it] 92%|█████████▏| 11256/12188 [36:47<1:57:39, 7.57s/it] {'loss': 0.2922, 'grad_norm': 1.4270163908946427, 'learning_rate': 1.5289568377281905e-07, 'epoch': 0.92} + 92%|█████████▏| 11256/12188 [36:47<1:57:39, 7.57s/it] 92%|█████████▏| 11257/12188 [36:54<1:57:31, 7.57s/it] {'loss': 0.3226, 'grad_norm': 0.6829870913262469, 'learning_rate': 1.525697849568264e-07, 'epoch': 0.92} + 92%|█████████▏| 11257/12188 [36:54<1:57:31, 7.57s/it] 92%|█████████▏| 11258/12188 [37:04<2:05:45, 8.11s/it] {'loss': 0.3126, 'grad_norm': 0.7590322842287303, 'learning_rate': 1.5224422845856678e-07, 'epoch': 0.92} + 92%|█████████▏| 11258/12188 [37:04<2:05:45, 8.11s/it] 92%|█████████▏| 11259/12188 [37:13<2:12:43, 8.57s/it] {'loss': 0.2657, 'grad_norm': 0.68215068527405, 'learning_rate': 1.5191901430103185e-07, 'epoch': 0.92} + 92%|█████████▏| 11259/12188 [37:13<2:12:43, 8.57s/it] 92%|█████████▏| 11260/12188 [37:21<2:08:22, 8.30s/it] {'loss': 0.3424, 'grad_norm': 0.8290426973810083, 'learning_rate': 1.5159414250718706e-07, 'epoch': 0.92} + 92%|█████████▏| 11260/12188 [37:21<2:08:22, 8.30s/it] 92%|█████████▏| 11261/12188 [37:29<2:04:19, 8.05s/it] {'loss': 0.2938, 'grad_norm': 0.7024612883770815, 'learning_rate': 1.5126961309997355e-07, 'epoch': 0.92} + 92%|█████████▏| 11261/12188 [37:29<2:04:19, 8.05s/it] 92%|█████████▏| 11262/12188 [37:36<2:00:46, 7.83s/it] {'loss': 0.2898, 'grad_norm': 0.7086143270946227, 'learning_rate': 1.5094542610231133e-07, 'epoch': 0.92} + 92%|█████████▏| 11262/12188 [37:36<2:00:46, 7.83s/it] 92%|█████████▏| 11263/12188 [37:44<2:00:07, 7.79s/it] {'loss': 0.3182, 'grad_norm': 0.6406344220471059, 'learning_rate': 1.506215815370915e-07, 'epoch': 0.92} + 92%|█████████▏| 11263/12188 [37:44<2:00:07, 7.79s/it] 92%|█████████▏| 11264/12188 [37:51<1:58:54, 7.72s/it] {'loss': 0.2859, 'grad_norm': 0.7933529067986125, 'learning_rate': 1.5029807942718466e-07, 'epoch': 0.92} + 92%|█████████▏| 11264/12188 [37:51<1:58:54, 7.72s/it] 92%|█████████▏| 11265/12188 [37:59<1:57:58, 7.67s/it] {'loss': 0.3222, 'grad_norm': 0.794534790606814, 'learning_rate': 1.4997491979543587e-07, 'epoch': 0.92} + 92%|█████████▏| 11265/12188 [37:59<1:57:58, 7.67s/it] 92%|█████████▏| 11266/12188 [38:06<1:56:17, 7.57s/it] {'loss': 0.3196, 'grad_norm': 0.6750508994025037, 'learning_rate': 1.496521026646658e-07, 'epoch': 0.92} + 92%|█████████▏| 11266/12188 [38:06<1:56:17, 7.57s/it] 92%|█████████▏| 11267/12188 [38:13<1:55:17, 7.51s/it] {'loss': 0.2977, 'grad_norm': 0.7593303539514339, 'learning_rate': 1.4932962805767225e-07, 'epoch': 0.92} + 92%|█████████▏| 11267/12188 [38:13<1:55:17, 7.51s/it] 92%|█████████▏| 11268/12188 [38:21<1:55:02, 7.50s/it] {'loss': 0.2873, 'grad_norm': 0.7179146474938451, 'learning_rate': 1.4900749599722653e-07, 'epoch': 0.92} + 92%|█████████▏| 11268/12188 [38:21<1:55:02, 7.50s/it] 92%|█████████▏| 11269/12188 [38:29<1:55:53, 7.57s/it] {'loss': 0.2741, 'grad_norm': 0.7850343304713295, 'learning_rate': 1.4868570650607816e-07, 'epoch': 0.92} + 92%|█████████▏| 11269/12188 [38:29<1:55:53, 7.57s/it] 92%|█████████▏| 11270/12188 [38:36<1:54:45, 7.50s/it] {'loss': 0.2909, 'grad_norm': 0.6978921782379626, 'learning_rate': 1.4836425960695123e-07, 'epoch': 0.92} + 92%|█████████▏| 11270/12188 [38:36<1:54:45, 7.50s/it] 92%|█████████▏| 11271/12188 [38:44<1:56:44, 7.64s/it] {'loss': 0.3262, 'grad_norm': 0.6979728814980153, 'learning_rate': 1.4804315532254475e-07, 'epoch': 0.92} + 92%|█████████▏| 11271/12188 [38:44<1:56:44, 7.64s/it] 92%|█████████▏| 11272/12188 [38:51<1:56:30, 7.63s/it] {'loss': 0.2944, 'grad_norm': 0.6985883908306517, 'learning_rate': 1.4772239367553564e-07, 'epoch': 0.92} + 92%|█████████▏| 11272/12188 [38:51<1:56:30, 7.63s/it] 92%|█████████▏| 11273/12188 [38:59<1:55:19, 7.56s/it] {'loss': 0.2947, 'grad_norm': 0.6659904539890675, 'learning_rate': 1.474019746885752e-07, 'epoch': 0.92} + 92%|█████████▏| 11273/12188 [38:59<1:55:19, 7.56s/it] 93%|█████████▎| 11274/12188 [39:07<1:55:36, 7.59s/it] {'loss': 0.3079, 'grad_norm': 0.6817429909123156, 'learning_rate': 1.4708189838429087e-07, 'epoch': 0.92} + 93%|█████████▎| 11274/12188 [39:07<1:55:36, 7.59s/it] 93%|█████████▎| 11275/12188 [39:14<1:53:43, 7.47s/it] {'loss': 0.3075, 'grad_norm': 0.7549974121390611, 'learning_rate': 1.4676216478528683e-07, 'epoch': 0.93} + 93%|█████████▎| 11275/12188 [39:14<1:53:43, 7.47s/it] 93%|█████████▎| 11276/12188 [39:21<1:54:18, 7.52s/it] {'loss': 0.2986, 'grad_norm': 0.9295201700457826, 'learning_rate': 1.4644277391414107e-07, 'epoch': 0.93} + 93%|█████████▎| 11276/12188 [39:21<1:54:18, 7.52s/it] 93%|█████████▎| 11277/12188 [39:29<1:53:38, 7.49s/it] {'loss': 0.2789, 'grad_norm': 0.718340537534761, 'learning_rate': 1.4612372579340893e-07, 'epoch': 0.93} + 93%|█████████▎| 11277/12188 [39:29<1:53:38, 7.49s/it] 93%|█████████▎| 11278/12188 [39:37<1:55:42, 7.63s/it] {'loss': 0.2897, 'grad_norm': 0.659254255382194, 'learning_rate': 1.4580502044562016e-07, 'epoch': 0.93} + 93%|█████████▎| 11278/12188 [39:37<1:55:42, 7.63s/it] 93%|█████████▎| 11279/12188 [39:44<1:55:45, 7.64s/it] {'loss': 0.3121, 'grad_norm': 0.797120424569661, 'learning_rate': 1.454866578932823e-07, 'epoch': 0.93} + 93%|█████████▎| 11279/12188 [39:44<1:55:45, 7.64s/it] 93%|█████████▎| 11280/12188 [39:52<1:56:31, 7.70s/it] {'loss': 0.2828, 'grad_norm': 0.6847949039391912, 'learning_rate': 1.4516863815887795e-07, 'epoch': 0.93} + 93%|█████████▎| 11280/12188 [39:52<1:56:31, 7.70s/it] 93%|█████████▎| 11281/12188 [40:00<1:55:16, 7.63s/it] {'loss': 0.3091, 'grad_norm': 0.67594605535415, 'learning_rate': 1.4485096126486355e-07, 'epoch': 0.93} + 93%|█████████▎| 11281/12188 [40:00<1:55:16, 7.63s/it] 93%|█████████▎| 11282/12188 [40:07<1:55:57, 7.68s/it] {'loss': 0.271, 'grad_norm': 0.6732869434447714, 'learning_rate': 1.4453362723367449e-07, 'epoch': 0.93} + 93%|█████████▎| 11282/12188 [40:08<1:55:57, 7.68s/it] 93%|█████████▎| 11283/12188 [40:15<1:54:08, 7.57s/it] {'loss': 0.3131, 'grad_norm': 0.7183071249117103, 'learning_rate': 1.4421663608772009e-07, 'epoch': 0.93} + 93%|█████████▎| 11283/12188 [40:15<1:54:08, 7.57s/it] 93%|█████████▎| 11284/12188 [40:25<2:07:12, 8.44s/it] {'loss': 0.2573, 'grad_norm': 0.6752971073416759, 'learning_rate': 1.4389998784938465e-07, 'epoch': 0.93} + 93%|█████████▎| 11284/12188 [40:25<2:07:12, 8.44s/it] 93%|█████████▎| 11285/12188 [40:33<2:05:44, 8.36s/it] {'loss': 0.2857, 'grad_norm': 0.6944810955715095, 'learning_rate': 1.4358368254103084e-07, 'epoch': 0.93} + 93%|█████████▎| 11285/12188 [40:33<2:05:44, 8.36s/it] 93%|█████████▎| 11286/12188 [40:41<2:00:34, 8.02s/it] {'loss': 0.3511, 'grad_norm': 0.724692445678581, 'learning_rate': 1.4326772018499414e-07, 'epoch': 0.93} + 93%|█████████▎| 11286/12188 [40:41<2:00:34, 8.02s/it] 93%|█████████▎| 11287/12188 [40:52<2:13:49, 8.91s/it] {'loss': 0.2837, 'grad_norm': 0.7117560893041518, 'learning_rate': 1.4295210080358835e-07, 'epoch': 0.93} + 93%|█████████▎| 11287/12188 [40:52<2:13:49, 8.91s/it] 93%|█████████▎| 11288/12188 [40:59<2:06:55, 8.46s/it] {'loss': 0.2705, 'grad_norm': 0.7005462828353944, 'learning_rate': 1.4263682441910233e-07, 'epoch': 0.93} + 93%|█████████▎| 11288/12188 [40:59<2:06:55, 8.46s/it] 93%|█████████▎| 11289/12188 [41:07<2:03:12, 8.22s/it] {'loss': 0.3493, 'grad_norm': 0.8498327730214074, 'learning_rate': 1.4232189105379935e-07, 'epoch': 0.93} + 93%|█████████▎| 11289/12188 [41:07<2:03:12, 8.22s/it] 93%|█████████▎| 11290/12188 [41:14<1:59:22, 7.98s/it] {'loss': 0.2779, 'grad_norm': 0.684546161141997, 'learning_rate': 1.4200730072991996e-07, 'epoch': 0.93} + 93%|█████████▎| 11290/12188 [41:14<1:59:22, 7.98s/it] 93%|█████████▎| 11291/12188 [41:24<2:06:49, 8.48s/it] {'loss': 0.3232, 'grad_norm': 0.7368485610691786, 'learning_rate': 1.4169305346968033e-07, 'epoch': 0.93} + 93%|█████████▎| 11291/12188 [41:24<2:06:49, 8.48s/it] 93%|█████████▎| 11292/12188 [41:32<2:04:14, 8.32s/it] {'loss': 0.2545, 'grad_norm': 0.6715791684176645, 'learning_rate': 1.4137914929527097e-07, 'epoch': 0.93} + 93%|█████████▎| 11292/12188 [41:32<2:04:14, 8.32s/it] 93%|█████████▎| 11293/12188 [41:39<2:00:08, 8.05s/it] {'loss': 0.2502, 'grad_norm': 0.6863598838757133, 'learning_rate': 1.410655882288614e-07, 'epoch': 0.93} + 93%|█████████▎| 11293/12188 [41:39<2:00:08, 8.05s/it] 93%|█████████▎| 11294/12188 [41:47<1:58:31, 7.95s/it] {'loss': 0.3092, 'grad_norm': 0.8010982568166047, 'learning_rate': 1.4075237029259227e-07, 'epoch': 0.93} + 93%|█████████▎| 11294/12188 [41:47<1:58:31, 7.95s/it] 93%|█████████▎| 11295/12188 [41:55<1:57:14, 7.88s/it] {'loss': 0.2874, 'grad_norm': 0.6705657571860355, 'learning_rate': 1.404394955085847e-07, 'epoch': 0.93} + 93%|█████████▎| 11295/12188 [41:55<1:57:14, 7.88s/it] 93%|█████████▎| 11296/12188 [42:03<2:00:39, 8.12s/it] {'loss': 0.2775, 'grad_norm': 0.7316853808713365, 'learning_rate': 1.401269638989322e-07, 'epoch': 0.93} + 93%|█████████▎| 11296/12188 [42:03<2:00:39, 8.12s/it] 93%|█████████▎| 11297/12188 [42:11<1:57:11, 7.89s/it] {'loss': 0.2723, 'grad_norm': 0.7139781543064865, 'learning_rate': 1.3981477548570544e-07, 'epoch': 0.93} + 93%|█████████▎| 11297/12188 [42:11<1:57:11, 7.89s/it] 93%|█████████▎| 11298/12188 [42:20<2:05:32, 8.46s/it] {'loss': 0.3046, 'grad_norm': 0.7314803747348139, 'learning_rate': 1.395029302909512e-07, 'epoch': 0.93} + 93%|███��█████▎| 11298/12188 [42:20<2:05:32, 8.46s/it] 93%|█████████▎| 11299/12188 [42:28<1:59:56, 8.10s/it] {'loss': 0.2616, 'grad_norm': 0.7283965355999754, 'learning_rate': 1.391914283366902e-07, 'epoch': 0.93} + 93%|█████████▎| 11299/12188 [42:28<1:59:56, 8.10s/it] 93%|█████████▎| 11300/12188 [42:35<1:56:46, 7.89s/it] {'loss': 0.2656, 'grad_norm': 0.6958124189152547, 'learning_rate': 1.3888026964492208e-07, 'epoch': 0.93} + 93%|█████████▎| 11300/12188 [42:35<1:56:46, 7.89s/it] 93%|█████████▎| 11301/12188 [42:43<1:55:38, 7.82s/it] {'loss': 0.3163, 'grad_norm': 0.7295573936436791, 'learning_rate': 1.3856945423761813e-07, 'epoch': 0.93} + 93%|█████████▎| 11301/12188 [42:43<1:55:38, 7.82s/it] 93%|█████████▎| 11302/12188 [42:51<1:56:04, 7.86s/it] {'loss': 0.3275, 'grad_norm': 0.763953193318596, 'learning_rate': 1.3825898213672918e-07, 'epoch': 0.93} + 93%|█████████▎| 11302/12188 [42:51<1:56:04, 7.86s/it] 93%|█████████▎| 11303/12188 [42:58<1:55:30, 7.83s/it] {'loss': 0.3109, 'grad_norm': 0.82011987365467, 'learning_rate': 1.379488533641804e-07, 'epoch': 0.93} + 93%|█████████▎| 11303/12188 [42:58<1:55:30, 7.83s/it] 93%|█████████▎| 11304/12188 [43:06<1:53:33, 7.71s/it] {'loss': 0.2938, 'grad_norm': 0.7064057225881357, 'learning_rate': 1.376390679418721e-07, 'epoch': 0.93} + 93%|█████████▎| 11304/12188 [43:06<1:53:33, 7.71s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 93%|█████████▎| 11305/12188 [43:13<1:49:59, 7.47s/it] {'loss': 0.6324, 'grad_norm': 0.593066555050762, 'learning_rate': 1.373296258916801e-07, 'epoch': 0.93} + 93%|█████████▎| 11305/12188 [43:13<1:49:59, 7.47s/it] 93%|█████████▎| 11306/12188 [43:24<2:04:56, 8.50s/it] {'loss': 0.2559, 'grad_norm': 0.7032499203370332, 'learning_rate': 1.3702052723545856e-07, 'epoch': 0.93} + 93%|█████████▎| 11306/12188 [43:24<2:04:56, 8.50s/it] 93%|█████████▎| 11307/12188 [43:32<2:04:23, 8.47s/it] {'loss': 0.2982, 'grad_norm': 0.6756299047239213, 'learning_rate': 1.3671177199503338e-07, 'epoch': 0.93} + 93%|█████████▎| 11307/12188 [43:32<2:04:23, 8.47s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 93%|█████████▎| 11308/12188 [43:40<2:01:02, 8.25s/it] {'loss': 0.6589, 'grad_norm': 0.6210576385567462, 'learning_rate': 1.364033601922099e-07, 'epoch': 0.93} + 93%|█████████▎| 11308/12188 [43:40<2:01:02, 8.25s/it] 93%|█████████▎| 11309/12188 [43:47<1:57:58, 8.05s/it] {'loss': 0.2986, 'grad_norm': 0.6989787731096335, 'learning_rate': 1.360952918487668e-07, 'epoch': 0.93} + 93%|█████████▎| 11309/12188 [43:47<1:57:58, 8.05s/it]Invalidate trace cache @ step 2: expected module 1, but got module 364 + 93%|█████████▎| 11310/12188 [43:55<1:57:31, 8.03s/it] {'loss': 0.6251, 'grad_norm': 0.5604770703059284, 'learning_rate': 1.3578756698646056e-07, 'epoch': 0.93} + 93%|█████████▎| 11310/12188 [43:55<1:57:31, 8.03s/it] 93%|█████████▎| 11311/12188 [44:03<1:55:49, 7.92s/it] {'loss': 0.33, 'grad_norm': 0.7911659236298193, 'learning_rate': 1.3548018562702104e-07, 'epoch': 0.93} + 93%|█████████▎| 11311/12188 [44:03<1:55:49, 7.92s/it] 93%|█████████▎| 11312/12188 [44:11<1:54:04, 7.81s/it] {'loss': 0.3322, 'grad_norm': 0.6867741797946453, 'learning_rate': 1.3517314779215473e-07, 'epoch': 0.93} + 93%|█████████▎| 11312/12188 [44:11<1:54:04, 7.81s/it] 93%|█████████▎| 11313/12188 [44:19<1:54:20, 7.84s/it] {'loss': 0.284, 'grad_norm': 0.7257845619484413, 'learning_rate': 1.3486645350354599e-07, 'epoch': 0.93} + 93%|█████████▎| 11313/12188 [44:19<1:54:20, 7.84s/it] 93%|█████████▎| 11314/12188 [44:26<1:53:20, 7.78s/it] {'loss': 0.2507, 'grad_norm': 0.6726026381122251, 'learning_rate': 1.345601027828508e-07, 'epoch': 0.93} + 93%|█████████▎| 11314/12188 [44:26<1:53:20, 7.78s/it] 93%|█████████▎| 11315/12188 [44:34<1:51:12, 7.64s/it] {'loss': 0.2973, 'grad_norm': 0.6951390267442393, 'learning_rate': 1.342540956517041e-07, 'epoch': 0.93} + 93%|█████████▎| 11315/12188 [44:34<1:51:12, 7.64s/it] 93%|█████████▎| 11316/12188 [44:41<1:50:22, 7.59s/it] {'loss': 0.2904, 'grad_norm': 1.1983671059225978, 'learning_rate': 1.3394843213171638e-07, 'epoch': 0.93} + 93%|█████████▎| 11316/12188 [44:41<1:50:22, 7.59s/it] 93%|█████████▎| 11317/12188 [44:49<1:51:43, 7.70s/it] {'loss': 0.3144, 'grad_norm': 0.7013899192290893, 'learning_rate': 1.33643112244472e-07, 'epoch': 0.93} + 93%|█████████▎| 11317/12188 [44:49<1:51:43, 7.70s/it] 93%|█████████▎| 11318/12188 [44:58<1:58:43, 8.19s/it] {'loss': 0.2923, 'grad_norm': 0.7720146195306696, 'learning_rate': 1.3333813601153322e-07, 'epoch': 0.93} + 93%|█████████▎| 11318/12188 [44:58<1:58:43, 8.19s/it] 93%|█████████▎| 11319/12188 [45:06<1:55:48, 8.00s/it] {'loss': 0.2968, 'grad_norm': 0.6717292895047386, 'learning_rate': 1.330335034544361e-07, 'epoch': 0.93} + 93%|█████████▎| 11319/12188 [45:06<1:55:48, 8.00s/it] 93%|█████████▎| 11320/12188 [45:14<1:54:35, 7.92s/it] {'loss': 0.2755, 'grad_norm': 0.6662895003714538, 'learning_rate': 1.3272921459469345e-07, 'epoch': 0.93} + 93%|█████████▎| 11320/12188 [45:14<1:54:35, 7.92s/it] 93%|█████████▎| 11321/12188 [45:21<1:52:21, 7.78s/it] {'loss': 0.2975, 'grad_norm': 0.7679706841656804, 'learning_rate': 1.3242526945379363e-07, 'epoch': 0.93} + 93%|█████████▎| 11321/12188 [45:21<1:52:21, 7.78s/it] 93%|█████████▎| 11322/12188 [45:32<2:05:19, 8.68s/it] {'loss': 0.2891, 'grad_norm': 0.7331630502770061, 'learning_rate': 1.3212166805320114e-07, 'epoch': 0.93} + 93%|█████████▎| 11322/12188 [45:32<2:05:19, 8.68s/it] 93%|█████████▎| 11323/12188 [45:43<2:14:14, 9.31s/it] {'loss': 0.315, 'grad_norm': 0.6796957127717965, 'learning_rate': 1.3181841041435605e-07, 'epoch': 0.93} + 93%|█████████▎| 11323/12188 [45:43<2:14:14, 9.31s/it] 93%|█████████▎| 11324/12188 [45:50<2:06:45, 8.80s/it] {'loss': 0.3208, 'grad_norm': 0.7261566472812229, 'learning_rate': 1.315154965586729e-07, 'epoch': 0.93} + 93%|█████████▎| 11324/12188 [45:50<2:06:45, 8.80s/it] 93%|█████████▎| 11325/12188 [45:58<2:01:27, 8.44s/it] {'loss': 0.3047, 'grad_norm': 0.7273980639672608, 'learning_rate': 1.3121292650754402e-07, 'epoch': 0.93} + 93%|█████████▎| 11325/12188 [45:58<2:01:27, 8.44s/it] 93%|█████████▎| 11326/12188 [46:05<1:56:31, 8.11s/it] {'loss': 0.315, 'grad_norm': 0.6963380002685616, 'learning_rate': 1.3091070028233622e-07, 'epoch': 0.93} + 93%|█████████▎| 11326/12188 [46:05<1:56:31, 8.11s/it] 93%|█████████▎| 11327/12188 [46:13<1:53:53, 7.94s/it] {'loss': 0.3001, 'grad_norm': 0.7288534285840331, 'learning_rate': 1.306088179043913e-07, 'epoch': 0.93} + 93%|█████████▎| 11327/12188 [46:13<1:53:53, 7.94s/it] 93%|█████████▎| 11328/12188 [46:20<1:50:57, 7.74s/it] {'loss': 0.3039, 'grad_norm': 0.7935116915332042, 'learning_rate': 1.303072793950283e-07, 'epoch': 0.93} + 93%|█████████▎| 11328/12188 [46:20<1:50:57, 7.74s/it] 93%|█████████▎| 11329/12188 [46:30<2:01:09, 8.46s/it] {'loss': 0.3166, 'grad_norm': 0.7171768424513504, 'learning_rate': 1.3000608477554188e-07, 'epoch': 0.93} + 93%|█████████▎| 11329/12188 [46:30<2:01:09, 8.46s/it] 93%|█████████▎| 11330/12188 [46:40<2:05:35, 8.78s/it] {'loss': 0.2849, 'grad_norm': 0.7153761866589071, 'learning_rate': 1.2970523406720114e-07, 'epoch': 0.93} + 93%|█████████▎| 11330/12188 [46:40<2:05:35, 8.78s/it] 93%|█████████▎| 11331/12188 [46:47<1:58:44, 8.31s/it] {'loss': 0.2641, 'grad_norm': 0.6922826824402796, 'learning_rate': 1.2940472729125241e-07, 'epoch': 0.93} + 93%|█████████▎| 11331/12188 [46:47<1:58:44, 8.31s/it] 93%|█████████▎| 11332/12188 [46:57<2:07:16, 8.92s/it] {'loss': 0.3188, 'grad_norm': 0.6696058243899249, 'learning_rate': 1.291045644689165e-07, 'epoch': 0.93} + 93%|█████████▎| 11332/12188 [46:57<2:07:16, 8.92s/it] 93%|█████████▎| 11333/12188 [47:04<2:00:13, 8.44s/it] {'loss': 0.3465, 'grad_norm': 0.7074256875536896, 'learning_rate': 1.2880474562139033e-07, 'epoch': 0.93} + 93%|█████████▎| 11333/12188 [47:04<2:00:13, 8.44s/it] 93%|█████████▎| 11334/12188 [47:12<1:57:44, 8.27s/it] {'loss': 0.2549, 'grad_norm': 0.6877398087876817, 'learning_rate': 1.28505270769847e-07, 'epoch': 0.93} + 93%|█████████▎| 11334/12188 [47:12<1:57:44, 8.27s/it] 93%|█████████▎| 11335/12188 [47:20<1:53:00, 7.95s/it] {'loss': 0.2629, 'grad_norm': 0.7149710787690073, 'learning_rate': 1.2820613993543452e-07, 'epoch': 0.93} + 93%|█████████▎| 11335/12188 [47:20<1:53:00, 7.95s/it] 93%|█████████▎| 11336/12188 [47:29<1:59:26, 8.41s/it] {'loss': 0.2976, 'grad_norm': 0.7229540418757125, 'learning_rate': 1.2790735313927825e-07, 'epoch': 0.93} + 93%|█████████▎| 11336/12188 [47:29<1:59:26, 8.41s/it] 93%|█████████▎| 11337/12188 [47:38<2:02:35, 8.64s/it] {'loss': 0.2569, 'grad_norm': 0.6982596910270458, 'learning_rate': 1.2760891040247637e-07, 'epoch': 0.93} + 93%|█████████▎| 11337/12188 [47:38<2:02:35, 8.64s/it] 93%|█████████▎| 11338/12188 [47:46<1:57:10, 8.27s/it] {'loss': 0.2692, 'grad_norm': 0.6272067417141023, 'learning_rate': 1.2731081174610526e-07, 'epoch': 0.93} + 93%|█████████▎| 11338/12188 [47:46<1:57:10, 8.27s/it] 93%|█████████▎| 11339/12188 [47:53<1:54:26, 8.09s/it] {'loss': 0.3427, 'grad_norm': 0.6975718961443422, 'learning_rate': 1.270130571912165e-07, 'epoch': 0.93} + 93%|█████████▎| 11339/12188 [47:53<1:54:26, 8.09s/it] 93%|█████████▎| 11340/12188 [48:01<1:50:59, 7.85s/it] {'loss': 0.309, 'grad_norm': 0.6826704821554437, 'learning_rate': 1.2671564675883595e-07, 'epoch': 0.93} + 93%|█████████▎| 11340/12188 [48:01<1:50:59, 7.85s/it] 93%|█████████▎| 11341/12188 [48:08<1:50:13, 7.81s/it] {'loss': 0.26, 'grad_norm': 0.6865336270516733, 'learning_rate': 1.2641858046996636e-07, 'epoch': 0.93} + 93%|█████████▎| 11341/12188 [48:08<1:50:13, 7.81s/it] 93%|█████████▎| 11342/12188 [48:16<1:48:25, 7.69s/it] {'loss': 0.3107, 'grad_norm': 0.7539957717422371, 'learning_rate': 1.261218583455881e-07, 'epoch': 0.93} + 93%|█████████▎| 11342/12188 [48:16<1:48:25, 7.69s/it] 93%|█████████▎| 11343/12188 [48:23<1:46:47, 7.58s/it] {'loss': 0.3072, 'grad_norm': 0.7005539685016577, 'learning_rate': 1.2582548040665276e-07, 'epoch': 0.93} + 93%|█████████▎| 11343/12188 [48:23<1:46:47, 7.58s/it] 93%|█████████▎| 11344/12188 [48:33<1:57:30, 8.35s/it] {'loss': 0.2712, 'grad_norm': 0.6049307188471116, 'learning_rate': 1.2552944667409139e-07, 'epoch': 0.93} + 93%|█████████▎| 11344/12188 [48:33<1:57:30, 8.35s/it] 93%|█████████▎| 11345/12188 [48:42<1:58:09, 8.41s/it] {'loss': 0.3012, 'grad_norm': 0.692870470887097, 'learning_rate': 1.2523375716880836e-07, 'epoch': 0.93} + 93%|█████████▎| 11345/12188 [48:42<1:58:09, 8.41s/it] 93%|█████████▎| 11346/12188 [48:49<1:54:21, 8.15s/it] {'loss': 0.2719, 'grad_norm': 0.728680672057194, 'learning_rate': 1.2493841191168587e-07, 'epoch': 0.93} + 93%|██████��██▎| 11346/12188 [48:49<1:54:21, 8.15s/it] 93%|█████████▎| 11347/12188 [48:58<1:57:38, 8.39s/it] {'loss': 0.3029, 'grad_norm': 0.6899178005010467, 'learning_rate': 1.2464341092358113e-07, 'epoch': 0.93} + 93%|█████████▎| 11347/12188 [48:58<1:57:38, 8.39s/it] 93%|█████████▎| 11348/12188 [49:06<1:53:18, 8.09s/it] {'loss': 0.3118, 'grad_norm': 0.7627259455355251, 'learning_rate': 1.2434875422532467e-07, 'epoch': 0.93} + 93%|█████████▎| 11348/12188 [49:06<1:53:18, 8.09s/it] 93%|█████████▎| 11349/12188 [49:13<1:51:14, 7.96s/it] {'loss': 0.3078, 'grad_norm': 0.7068565581650353, 'learning_rate': 1.2405444183772654e-07, 'epoch': 0.93} + 93%|█████████▎| 11349/12188 [49:13<1:51:14, 7.96s/it] 93%|█████████▎| 11350/12188 [49:21<1:49:56, 7.87s/it] {'loss': 0.2544, 'grad_norm': 0.6601198203041769, 'learning_rate': 1.2376047378156952e-07, 'epoch': 0.93} + 93%|█████████▎| 11350/12188 [49:21<1:49:56, 7.87s/it] 93%|█████████▎| 11351/12188 [49:29<1:48:24, 7.77s/it] {'loss': 0.2985, 'grad_norm': 0.7434556273183656, 'learning_rate': 1.2346685007761428e-07, 'epoch': 0.93} + 93%|█████████▎| 11351/12188 [49:29<1:48:24, 7.77s/it] 93%|█████████▎| 11352/12188 [49:36<1:46:13, 7.62s/it] {'loss': 0.3105, 'grad_norm': 0.6686697344028023, 'learning_rate': 1.2317357074659476e-07, 'epoch': 0.93} + 93%|█████████▎| 11352/12188 [49:36<1:46:13, 7.62s/it] 93%|█████████▎| 11353/12188 [49:43<1:45:27, 7.58s/it] {'loss': 0.2637, 'grad_norm': 0.7620720448654159, 'learning_rate': 1.2288063580922272e-07, 'epoch': 0.93} + 93%|█████████▎| 11353/12188 [49:43<1:45:27, 7.58s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f67272e4090> +[Try #0] Failed to fetch sample 4411617 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f67272e4090> +Problematic sample: {'image': '20240827_145511_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Q2790: 100%'"}, {'from': 'gpt', 'value': '\nclick(x=0.8995, y=0.972)\n'}]} + 93%|█████████▎| 11354/12188 [49:51<1:46:55, 7.69s/it] {'loss': 0.3004, 'grad_norm': 0.9563181249997663, 'learning_rate': 1.2258804528618495e-07, 'epoch': 0.93} + 93%|█████████▎| 11354/12188 [49:51<1:46:55, 7.69s/it] 93%|█████████▎| 11355/12188 [49:59<1:45:26, 7.59s/it] {'loss': 0.2715, 'grad_norm': 1.1432227354352245, 'learning_rate': 1.2229579919814217e-07, 'epoch': 0.93} + 93%|█████████▎| 11355/12188 [49:59<1:45:26, 7.59s/it] 93%|█████████▎| 11356/12188 [50:06<1:45:57, 7.64s/it] {'loss': 0.2876, 'grad_norm': 0.7305368604705774, 'learning_rate': 1.2200389756573395e-07, 'epoch': 0.93} + 93%|█████████▎| 11356/12188 [50:06<1:45:57, 7.64s/it] 93%|█████████▎| 11357/12188 [50:14<1:47:13, 7.74s/it] {'loss': 0.2401, 'grad_norm': 0.7160069013692295, 'learning_rate': 1.2171234040957324e-07, 'epoch': 0.93} + 93%|█████████▎| 11357/12188 [50:14<1:47:13, 7.74s/it] 93%|█████████▎| 11358/12188 [50:22<1:45:47, 7.65s/it] {'loss': 0.3396, 'grad_norm': 0.7472259082719492, 'learning_rate': 1.2142112775024972e-07, 'epoch': 0.93} + 93%|█████████▎| 11358/12188 [50:22<1:45:47, 7.65s/it] 93%|█████████▎| 11359/12188 [50:29<1:43:52, 7.52s/it] {'loss': 0.3047, 'grad_norm': 0.673307116597146, 'learning_rate': 1.2113025960832748e-07, 'epoch': 0.93} + 93%|█████████▎| 11359/12188 [50:29<1:43:52, 7.52s/it] 93%|█████████▎| 11360/12188 [50:36<1:43:41, 7.51s/it] {'loss': 0.2928, 'grad_norm': 0.719249184461802, 'learning_rate': 1.2083973600434894e-07, 'epoch': 0.93} + 93%|█████████▎| 11360/12188 [50:36<1:43:41, 7.51s/it] 93%|█████████▎| 11361/12188 [50:44<1:43:27, 7.51s/it] {'loss': 0.289, 'grad_norm': 0.6908989841745159, 'learning_rate': 1.205495569588283e-07, 'epoch': 0.93} + 93%|█████████▎| 11361/12188 [50:44<1:43:27, 7.51s/it] 93%|█████████▎| 11362/12188 [50:51<1:42:18, 7.43s/it] {'loss': 0.2945, 'grad_norm': 0.699792260463337, 'learning_rate': 1.2025972249225914e-07, 'epoch': 0.93} + 93%|█████████▎| 11362/12188 [50:51<1:42:18, 7.43s/it] 93%|█████████▎| 11363/12188 [50:59<1:41:59, 7.42s/it] {'loss': 0.2979, 'grad_norm': 0.6724413265549596, 'learning_rate': 1.1997023262510788e-07, 'epoch': 0.93} + 93%|█████████▎| 11363/12188 [50:59<1:41:59, 7.42s/it] 93%|█████████▎| 11364/12188 [51:07<1:43:56, 7.57s/it] {'loss': 0.3047, 'grad_norm': 1.2135802262445003, 'learning_rate': 1.196810873778187e-07, 'epoch': 0.93} + 93%|█████████▎| 11364/12188 [51:07<1:43:56, 7.57s/it] 93%|█████████▎| 11365/12188 [51:14<1:44:50, 7.64s/it] {'loss': 0.3267, 'grad_norm': 0.7713717069788307, 'learning_rate': 1.1939228677081027e-07, 'epoch': 0.93} + 93%|█████████▎| 11365/12188 [51:14<1:44:50, 7.64s/it] 93%|█████████▎| 11366/12188 [51:22<1:45:59, 7.74s/it] {'loss': 0.2718, 'grad_norm': 0.7822713932341598, 'learning_rate': 1.1910383082447797e-07, 'epoch': 0.93} + 93%|█████████▎| 11366/12188 [51:22<1:45:59, 7.74s/it] 93%|█████████▎| 11367/12188 [51:30<1:45:54, 7.74s/it] {'loss': 0.307, 'grad_norm': 0.6675976532896847, 'learning_rate': 1.1881571955919102e-07, 'epoch': 0.93} + 93%|█████████▎| 11367/12188 [51:30<1:45:54, 7.74s/it] 93%|█████████▎| 11368/12188 [51:37<1:44:33, 7.65s/it] {'loss': 0.2597, 'grad_norm': 0.6540416551397882, 'learning_rate': 1.1852795299529485e-07, 'epoch': 0.93} + 93%|█████████▎| 11368/12188 [51:37<1:44:33, 7.65s/it] 93%|█████████▎| 11369/12188 [51:45<1:43:35, 7.59s/it] {'loss': 0.2947, 'grad_norm': 0.7188410466220364, 'learning_rate': 1.1824053115311263e-07, 'epoch': 0.93} + 93%|█████████▎| 11369/12188 [51:45<1:43:35, 7.59s/it] 93%|█████████▎| 11370/12188 [51:52<1:43:22, 7.58s/it] {'loss': 0.2798, 'grad_norm': 0.6572633976137058, 'learning_rate': 1.179534540529409e-07, 'epoch': 0.93} + 93%|█████████▎| 11370/12188 [51:52<1:43:22, 7.58s/it] 93%|█████████▎| 11371/12188 [52:00<1:43:31, 7.60s/it] {'loss': 0.2835, 'grad_norm': 0.7203672925118564, 'learning_rate': 1.1766672171505289e-07, 'epoch': 0.93} + 93%|█████████▎| 11371/12188 [52:00<1:43:31, 7.60s/it] 93%|█████████▎| 11372/12188 [52:08<1:43:35, 7.62s/it] {'loss': 0.3188, 'grad_norm': 0.6943549169379132, 'learning_rate': 1.1738033415969686e-07, 'epoch': 0.93} + 93%|█████████▎| 11372/12188 [52:08<1:43:35, 7.62s/it] 93%|█████████▎| 11373/12188 [52:15<1:43:20, 7.61s/it] {'loss': 0.2835, 'grad_norm': 0.7696880228382366, 'learning_rate': 1.1709429140709661e-07, 'epoch': 0.93} + 93%|█████████▎| 11373/12188 [52:15<1:43:20, 7.61s/it] 93%|█████████▎| 11374/12188 [52:24<1:47:20, 7.91s/it] {'loss': 0.3169, 'grad_norm': 0.6965586799562371, 'learning_rate': 1.1680859347745266e-07, 'epoch': 0.93} + 93%|█████████▎| 11374/12188 [52:24<1:47:20, 7.91s/it] 93%|█████████▎| 11375/12188 [52:31<1:45:28, 7.78s/it] {'loss': 0.2869, 'grad_norm': 0.6910341328217541, 'learning_rate': 1.1652324039094054e-07, 'epoch': 0.93} + 93%|█████████▎| 11375/12188 [52:31<1:45:28, 7.78s/it] 93%|█████████▎| 11376/12188 [52:39<1:43:41, 7.66s/it] {'loss': 0.2889, 'grad_norm': 0.82551054963208, 'learning_rate': 1.1623823216771135e-07, 'epoch': 0.93} + 93%|█████████▎| 11376/12188 [52:39<1:43:41, 7.66s/it] 93%|█████████▎| 11377/12188 [52:48<1:47:48, 7.98s/it] {'loss': 0.2892, 'grad_norm': 0.7152022241461462, 'learning_rate': 1.1595356882789122e-07, 'epoch': 0.93} + 93%|█████████▎| 11377/12188 [52:48<1:47:48, 7.98s/it] 93%|█████████▎| 11378/12188 [52:55<1:45:59, 7.85s/it] {'loss': 0.3125, 'grad_norm': 0.6126444972274933, 'learning_rate': 1.1566925039158405e-07, 'epoch': 0.93} + 93%|█████████▎| 11378/12188 [52:55<1:45:59, 7.85s/it] 93%|█████████▎| 11379/12188 [53:03<1:44:05, 7.72s/it] {'loss': 0.2835, 'grad_norm': 0.7202586611361574, 'learning_rate': 1.1538527687886714e-07, 'epoch': 0.93} + 93%|█████████▎| 11379/12188 [53:03<1:44:05, 7.72s/it] 93%|█████████▎| 11380/12188 [53:11<1:45:11, 7.81s/it] {'loss': 0.2519, 'grad_norm': 0.8195151975378883, 'learning_rate': 1.1510164830979387e-07, 'epoch': 0.93} + 93%|█████████▎| 11380/12188 [53:11<1:45:11, 7.81s/it] 93%|█████████▎| 11381/12188 [53:18<1:44:11, 7.75s/it] {'loss': 0.2732, 'grad_norm': 0.7090745014823181, 'learning_rate': 1.1481836470439433e-07, 'epoch': 0.93} + 93%|█████████▎| 11381/12188 [53:18<1:44:11, 7.75s/it] 93%|█████████▎| 11382/12188 [53:25<1:42:20, 7.62s/it] {'loss': 0.2937, 'grad_norm': 0.6423483536903929, 'learning_rate': 1.145354260826731e-07, 'epoch': 0.93} + 93%|█████████▎| 11382/12188 [53:25<1:42:20, 7.62s/it] 93%|█████████▎| 11383/12188 [53:34<1:44:18, 7.77s/it] {'loss': 0.3156, 'grad_norm': 0.6896616274033623, 'learning_rate': 1.1425283246461083e-07, 'epoch': 0.93} + 93%|█████████▎| 11383/12188 [53:34<1:44:18, 7.77s/it] 93%|█████████▎| 11384/12188 [53:41<1:42:47, 7.67s/it] {'loss': 0.3098, 'grad_norm': 0.6448863618577595, 'learning_rate': 1.139705838701638e-07, 'epoch': 0.93} + 93%|█████████▎| 11384/12188 [53:41<1:42:47, 7.67s/it] 93%|█████████▎| 11385/12188 [53:50<1:47:09, 8.01s/it] {'loss': 0.2751, 'grad_norm': 0.7789249096004623, 'learning_rate': 1.1368868031926494e-07, 'epoch': 0.93} + 93%|█████████▎| 11385/12188 [53:50<1:47:09, 8.01s/it] 93%|█████████▎| 11386/12188 [53:58<1:48:52, 8.15s/it] {'loss': 0.2917, 'grad_norm': 0.6936033014838413, 'learning_rate': 1.1340712183182002e-07, 'epoch': 0.93} + 93%|█████████▎| 11386/12188 [53:58<1:48:52, 8.15s/it] 93%|█████████▎| 11387/12188 [54:07<1:51:19, 8.34s/it] {'loss': 0.2998, 'grad_norm': 0.7659509136121095, 'learning_rate': 1.1312590842771365e-07, 'epoch': 0.93} + 93%|█████████▎| 11387/12188 [54:07<1:51:19, 8.34s/it] 93%|█████████▎| 11388/12188 [54:14<1:46:54, 8.02s/it] {'loss': 0.3175, 'grad_norm': 0.7350386997255511, 'learning_rate': 1.1284504012680442e-07, 'epoch': 0.93} + 93%|█████████▎| 11388/12188 [54:14<1:46:54, 8.02s/it] 93%|█████████▎| 11389/12188 [54:22<1:43:55, 7.80s/it] {'loss': 0.2637, 'grad_norm': 0.8958812115886376, 'learning_rate': 1.1256451694892645e-07, 'epoch': 0.93} + 93%|█████████▎| 11389/12188 [54:22<1:43:55, 7.80s/it] 93%|█████████▎| 11390/12188 [54:30<1:46:24, 8.00s/it] {'loss': 0.2717, 'grad_norm': 0.7508145405763, 'learning_rate': 1.1228433891389056e-07, 'epoch': 0.93} + 93%|█████████▎| 11390/12188 [54:30<1:46:24, 8.00s/it] 93%|█████████▎| 11391/12188 [54:38<1:43:52, 7.82s/it] {'loss': 0.2851, 'grad_norm': 0.6647608854048891, 'learning_rate': 1.1200450604148095e-07, 'epoch': 0.93} + 93%|█████████▎| 11391/12188 [54:38<1:43:52, 7.82s/it] 93%|█████████▎| 11392/12188 [54:46<1:45:53, 7.98s/it] {'loss': 0.3381, 'grad_norm': 0.66358182941827, 'learning_rate': 1.1172501835146065e-07, 'epoch': 0.93} + 93%|█████████▎| 11392/12188 [54:46<1:45:53, 7.98s/it] 93%|█████████▎| 11393/12188 [54:57<1:56:36, 8.80s/it] {'loss': 0.2757, 'grad_norm': 0.740793170584464, 'learning_rate': 1.1144587586356614e-07, 'epoch': 0.93} + 93%|█████████▎| 11393/12188 [54:57<1:56:36, 8.80s/it] 93%|█████████▎| 11394/12188 [55:06<1:59:29, 9.03s/it] {'loss': 0.2922, 'grad_norm': 0.6512646350553803, 'learning_rate': 1.1116707859750941e-07, 'epoch': 0.93} + 93%|█████████▎| 11394/12188 [55:06<1:59:29, 9.03s/it] 93%|█████████▎| 11395/12188 [55:16<2:03:35, 9.35s/it] {'loss': 0.2677, 'grad_norm': 0.6850469949552235, 'learning_rate': 1.1088862657297916e-07, 'epoch': 0.93} + 93%|█████████▎| 11395/12188 [55:16<2:03:35, 9.35s/it] 94%|█████████▎| 11396/12188 [55:24<1:55:16, 8.73s/it] {'loss': 0.3162, 'grad_norm': 0.7005914661754405, 'learning_rate': 1.106105198096391e-07, 'epoch': 0.93} + 94%|█████████▎| 11396/12188 [55:24<1:55:16, 8.73s/it] 94%|█████████▎| 11397/12188 [55:31<1:50:02, 8.35s/it] {'loss': 0.2827, 'grad_norm': 0.7571390339721051, 'learning_rate': 1.1033275832712853e-07, 'epoch': 0.94} + 94%|█████████▎| 11397/12188 [55:31<1:50:02, 8.35s/it] 94%|█████████▎| 11398/12188 [55:39<1:46:38, 8.10s/it] {'loss': 0.2982, 'grad_norm': 0.7069939723087038, 'learning_rate': 1.1005534214506342e-07, 'epoch': 0.94} + 94%|█████████▎| 11398/12188 [55:39<1:46:38, 8.10s/it] 94%|█████████▎| 11399/12188 [55:46<1:44:35, 7.95s/it] {'loss': 0.3063, 'grad_norm': 0.6404881203116829, 'learning_rate': 1.0977827128303309e-07, 'epoch': 0.94} + 94%|█████████▎| 11399/12188 [55:46<1:44:35, 7.95s/it] 94%|█████████▎| 11400/12188 [55:54<1:44:04, 7.92s/it] {'loss': 0.2818, 'grad_norm': 0.723347912509273, 'learning_rate': 1.0950154576060413e-07, 'epoch': 0.94} + 94%|█████████▎| 11400/12188 [55:54<1:44:04, 7.92s/it] 94%|█████████▎| 11401/12188 [56:01<1:42:04, 7.78s/it] {'loss': 0.2668, 'grad_norm': 0.7542077419544452, 'learning_rate': 1.0922516559731978e-07, 'epoch': 0.94} + 94%|█████████▎| 11401/12188 [56:01<1:42:04, 7.78s/it] 94%|█████████▎| 11402/12188 [56:09<1:40:10, 7.65s/it] {'loss': 0.3269, 'grad_norm': 0.7177162248985793, 'learning_rate': 1.0894913081269608e-07, 'epoch': 0.94} + 94%|█████████▎| 11402/12188 [56:09<1:40:10, 7.65s/it] 94%|█████████▎| 11403/12188 [56:16<1:39:24, 7.60s/it] {'loss': 0.3109, 'grad_norm': 0.6966283999230296, 'learning_rate': 1.0867344142622693e-07, 'epoch': 0.94} + 94%|█████████▎| 11403/12188 [56:16<1:39:24, 7.60s/it] 94%|█████████▎| 11404/12188 [56:24<1:37:58, 7.50s/it] {'loss': 0.3031, 'grad_norm': 0.7108086248359303, 'learning_rate': 1.0839809745738061e-07, 'epoch': 0.94} + 94%|█████████▎| 11404/12188 [56:24<1:37:58, 7.50s/it] 94%|█████████▎| 11405/12188 [56:31<1:37:55, 7.50s/it] {'loss': 0.2796, 'grad_norm': 0.756789845449846, 'learning_rate': 1.0812309892560157e-07, 'epoch': 0.94} + 94%|█████████▎| 11405/12188 [56:31<1:37:55, 7.50s/it] 94%|█████████▎| 11406/12188 [56:40<1:44:30, 8.02s/it] {'loss': 0.3226, 'grad_norm': 0.6312500303703533, 'learning_rate': 1.078484458503104e-07, 'epoch': 0.94} + 94%|█████████▎| 11406/12188 [56:40<1:44:30, 8.02s/it] 94%|█████████▎| 11407/12188 [56:47<1:41:15, 7.78s/it] {'loss': 0.2677, 'grad_norm': 0.6705309153607277, 'learning_rate': 1.0757413825090212e-07, 'epoch': 0.94} + 94%|█████████▎| 11407/12188 [56:47<1:41:15, 7.78s/it] 94%|█████████▎| 11408/12188 [56:55<1:40:43, 7.75s/it] {'loss': 0.2916, 'grad_norm': 0.7069090425686048, 'learning_rate': 1.0730017614674736e-07, 'epoch': 0.94} + 94%|█████████▎| 11408/12188 [56:55<1:40:43, 7.75s/it] 94%|█████████▎| 11409/12188 [57:03<1:39:27, 7.66s/it] {'loss': 0.3257, 'grad_norm': 0.7021047243998828, 'learning_rate': 1.07026559557194e-07, 'epoch': 0.94} + 94%|█████████▎| 11409/12188 [57:03<1:39:27, 7.66s/it] 94%|█████████▎| 11410/12188 [57:10<1:37:49, 7.54s/it] {'loss': 0.2897, 'grad_norm': 0.6482833492790715, 'learning_rate': 1.0675328850156319e-07, 'epoch': 0.94} + 94%|█████████▎| 11410/12188 [57:10<1:37:49, 7.54s/it] 94%|█████████▎| 11411/12188 [57:17<1:37:43, 7.55s/it] {'loss': 0.3561, 'grad_norm': 1.0958555761546935, 'learning_rate': 1.0648036299915399e-07, 'epoch': 0.94} + 94%|█████████▎| 11411/12188 [57:17<1:37:43, 7.55s/it] 94%|█████████▎| 11412/12188 [57:27<1:47:21, 8.30s/it] {'loss': 0.3077, 'grad_norm': 1.3956406613874863, 'learning_rate': 1.0620778306923929e-07, 'epoch': 0.94} + 94%|█████████▎| 11412/12188 [57:27<1:47:21, 8.30s/it] 94%|█████████▎| 11413/12188 [57:35<1:44:22, 8.08s/it] {'loss': 0.3324, 'grad_norm': 0.7270396975116507, 'learning_rate': 1.059355487310687e-07, 'epoch': 0.94} + 94%|█████████▎| 11413/12188 [57:35<1:44:22, 8.08s/it] 94%|█████████▎| 11414/12188 [57:43<1:42:21, 7.93s/it] {'loss': 0.2647, 'grad_norm': 0.6539490933305117, 'learning_rate': 1.0566366000386685e-07, 'epoch': 0.94} + 94%|█████████▎| 11414/12188 [57:43<1:42:21, 7.93s/it] 94%|█████████▎| 11415/12188 [57:50<1:40:59, 7.84s/it] {'loss': 0.2955, 'grad_norm': 0.7342834202437024, 'learning_rate': 1.0539211690683337e-07, 'epoch': 0.94} + 94%|█████████▎| 11415/12188 [57:50<1:40:59, 7.84s/it] 94%|█████████▎| 11416/12188 [57:58<1:38:38, 7.67s/it] {'loss': 0.2851, 'grad_norm': 0.6738348399562187, 'learning_rate': 1.0512091945914571e-07, 'epoch': 0.94} + 94%|█████████▎| 11416/12188 [57:58<1:38:38, 7.67s/it] 94%|█████████▎| 11417/12188 [58:06<1:41:04, 7.87s/it] {'loss': 0.3022, 'grad_norm': 0.6454627459951423, 'learning_rate': 1.0485006767995354e-07, 'epoch': 0.94} + 94%|█████████▎| 11417/12188 [58:06<1:41:04, 7.87s/it] 94%|█████████▎| 11418/12188 [58:14<1:40:21, 7.82s/it] {'loss': 0.3342, 'grad_norm': 0.6810733174296697, 'learning_rate': 1.0457956158838545e-07, 'epoch': 0.94} + 94%|█████████▎| 11418/12188 [58:14<1:40:21, 7.82s/it] 94%|█████████▎| 11419/12188 [58:21<1:40:20, 7.83s/it] {'loss': 0.3137, 'grad_norm': 0.7096071201988584, 'learning_rate': 1.0430940120354393e-07, 'epoch': 0.94} + 94%|█████████▎| 11419/12188 [58:21<1:40:20, 7.83s/it] 94%|█████████▎| 11420/12188 [58:31<1:46:28, 8.32s/it] {'loss': 0.3235, 'grad_norm': 0.7091124758561269, 'learning_rate': 1.0403958654450652e-07, 'epoch': 0.94} + 94%|█████████▎| 11420/12188 [58:31<1:46:28, 8.32s/it] 94%|█████████▎| 11421/12188 [58:39<1:44:23, 8.17s/it] {'loss': 0.296, 'grad_norm': 0.716195530791351, 'learning_rate': 1.037701176303274e-07, 'epoch': 0.94} + 94%|█████���███▎| 11421/12188 [58:39<1:44:23, 8.17s/it] 94%|█████████▎| 11422/12188 [58:46<1:42:12, 8.01s/it] {'loss': 0.2847, 'grad_norm': 0.6801368675866272, 'learning_rate': 1.0350099448003692e-07, 'epoch': 0.94} + 94%|█████████▎| 11422/12188 [58:46<1:42:12, 8.01s/it] 94%|█████████▎| 11423/12188 [58:55<1:44:08, 8.17s/it] {'loss': 0.3201, 'grad_norm': 0.6666004055091737, 'learning_rate': 1.0323221711263876e-07, 'epoch': 0.94} + 94%|█████████▎| 11423/12188 [58:55<1:44:08, 8.17s/it] 94%|█████████▎| 11424/12188 [59:02<1:40:57, 7.93s/it] {'loss': 0.3032, 'grad_norm': 0.7503059771870692, 'learning_rate': 1.0296378554711439e-07, 'epoch': 0.94} + 94%|█████████▎| 11424/12188 [59:02<1:40:57, 7.93s/it] 94%|█████████▎| 11425/12188 [59:10<1:38:23, 7.74s/it] {'loss': 0.2723, 'grad_norm': 0.6609817933238784, 'learning_rate': 1.0269569980241978e-07, 'epoch': 0.94} + 94%|█████████▎| 11425/12188 [59:10<1:38:23, 7.74s/it] 94%|█████████▎| 11426/12188 [59:17<1:36:30, 7.60s/it] {'loss': 0.3184, 'grad_norm': 0.6680656245403223, 'learning_rate': 1.02427959897487e-07, 'epoch': 0.94} + 94%|█████████▎| 11426/12188 [59:17<1:36:30, 7.60s/it] 94%|█████████▍| 11427/12188 [59:25<1:37:08, 7.66s/it] {'loss': 0.2599, 'grad_norm': 0.7027542145849075, 'learning_rate': 1.0216056585122258e-07, 'epoch': 0.94} + 94%|█████████▍| 11427/12188 [59:25<1:37:08, 7.66s/it] 94%|█████████▍| 11428/12188 [59:32<1:36:10, 7.59s/it] {'loss': 0.3173, 'grad_norm': 0.7597473867667347, 'learning_rate': 1.0189351768250977e-07, 'epoch': 0.94} + 94%|█████████▍| 11428/12188 [59:32<1:36:10, 7.59s/it] 94%|█████████▍| 11429/12188 [59:39<1:34:41, 7.49s/it] {'loss': 0.2952, 'grad_norm': 0.7763884027728617, 'learning_rate': 1.0162681541020791e-07, 'epoch': 0.94} + 94%|█████████▍| 11429/12188 [59:39<1:34:41, 7.49s/it] 94%|█████████▍| 11430/12188 [59:47<1:34:23, 7.47s/it] {'loss': 0.3512, 'grad_norm': 0.67041235146651, 'learning_rate': 1.0136045905315028e-07, 'epoch': 0.94} + 94%|█████████▍| 11430/12188 [59:47<1:34:23, 7.47s/it] 94%|█████████▍| 11431/12188 [59:54<1:34:23, 7.48s/it] {'loss': 0.2799, 'grad_norm': 0.6778152558807613, 'learning_rate': 1.0109444863014684e-07, 'epoch': 0.94} + 94%|█████████▍| 11431/12188 [59:54<1:34:23, 7.48s/it] 94%|█████████▍| 11432/12188 [1:00:02<1:33:35, 7.43s/it] {'loss': 0.2935, 'grad_norm': 0.8274101173392185, 'learning_rate': 1.0082878415998309e-07, 'epoch': 0.94} + 94%|█████████▍| 11432/12188 [1:00:02<1:33:35, 7.43s/it] 94%|█████████▍| 11433/12188 [1:00:09<1:34:32, 7.51s/it] {'loss': 0.3289, 'grad_norm': 0.7236163054366673, 'learning_rate': 1.0056346566141906e-07, 'epoch': 0.94} + 94%|█████████▍| 11433/12188 [1:00:09<1:34:32, 7.51s/it] 94%|█████████▍| 11434/12188 [1:00:17<1:35:15, 7.58s/it] {'loss': 0.2991, 'grad_norm': 0.68777586512269, 'learning_rate': 1.0029849315319196e-07, 'epoch': 0.94} + 94%|█████████▍| 11434/12188 [1:00:17<1:35:15, 7.58s/it] 94%|█████████▍| 11435/12188 [1:00:24<1:34:32, 7.53s/it] {'loss': 0.3161, 'grad_norm': 0.7060564727549526, 'learning_rate': 1.0003386665401293e-07, 'epoch': 0.94} + 94%|█████████▍| 11435/12188 [1:00:24<1:34:32, 7.53s/it] 94%|█████████▍| 11436/12188 [1:00:32<1:35:25, 7.61s/it] {'loss': 0.2877, 'grad_norm': 0.6792646972898028, 'learning_rate': 9.976958618256982e-08, 'epoch': 0.94} + 94%|█████████▍| 11436/12188 [1:00:32<1:35:25, 7.61s/it] 94%|█████████▍| 11437/12188 [1:00:40<1:36:50, 7.74s/it] {'loss': 0.3436, 'grad_norm': 0.8881067833235698, 'learning_rate': 9.950565175752602e-08, 'epoch': 0.94} + 94%|█████████▍| 11437/12188 [1:00:40<1:36:50, 7.74s/it] 94%|█████████▍| 11438/12188 [1:00:48<1:37:35, 7.81s/it] {'loss': 0.3157, 'grad_norm': 0.9293869187017928, 'learning_rate': 9.92420633975194e-08, 'epoch': 0.94} + 94%|█████████▍| 11438/12188 [1:00:48<1:37:35, 7.81s/it] 94%|█████████▍| 11439/12188 [1:00:57<1:42:27, 8.21s/it] {'loss': 0.2773, 'grad_norm': 0.6377382813319581, 'learning_rate': 9.897882112116509e-08, 'epoch': 0.94} + 94%|█████████▍| 11439/12188 [1:00:57<1:42:27, 8.21s/it] 94%|█████████▍| 11440/12188 [1:01:05<1:39:26, 7.98s/it] {'loss': 0.3107, 'grad_norm': 0.6836583759668082, 'learning_rate': 9.871592494705206e-08, 'epoch': 0.94} + 94%|█████████▍| 11440/12188 [1:01:05<1:39:26, 7.98s/it] 94%|█████████▍| 11441/12188 [1:01:16<1:52:24, 9.03s/it] {'loss': 0.2921, 'grad_norm': 0.7308620994963569, 'learning_rate': 9.845337489374551e-08, 'epoch': 0.94} + 94%|█████████▍| 11441/12188 [1:01:16<1:52:24, 9.03s/it] 94%|█████████▍| 11442/12188 [1:01:25<1:50:40, 8.90s/it] {'loss': 0.3062, 'grad_norm': 0.9451983149243877, 'learning_rate': 9.819117097978726e-08, 'epoch': 0.94} + 94%|█████████▍| 11442/12188 [1:01:25<1:50:40, 8.90s/it] 94%|█████████▍| 11443/12188 [1:01:34<1:51:56, 9.01s/it] {'loss': 0.2911, 'grad_norm': 1.127779596808752, 'learning_rate': 9.792931322369304e-08, 'epoch': 0.94} + 94%|█████████▍| 11443/12188 [1:01:34<1:51:56, 9.01s/it] 94%|█████████▍| 11444/12188 [1:01:43<1:52:06, 9.04s/it] {'loss': 0.2839, 'grad_norm': 0.7039047667305782, 'learning_rate': 9.766780164395472e-08, 'epoch': 0.94} + 94%|█████████▍| 11444/12188 [1:01:43<1:52:06, 9.04s/it] 94%|█████████▍| 11445/12188 [1:01:51<1:46:58, 8.64s/it] {'loss': 0.2885, 'grad_norm': 0.7241812376798402, 'learning_rate': 9.740663625904034e-08, 'epoch': 0.94} + 94%|█████████▍| 11445/12188 [1:01:51<1:46:58, 8.64s/it] 94%|█████████▍| 11446/12188 [1:01:58<1:42:28, 8.29s/it] {'loss': 0.3162, 'grad_norm': 0.7773796437347924, 'learning_rate': 9.714581708739235e-08, 'epoch': 0.94} + 94%|█████████▍| 11446/12188 [1:01:58<1:42:28, 8.29s/it] 94%|█████████▍| 11447/12188 [1:02:09<1:49:31, 8.87s/it] {'loss': 0.2962, 'grad_norm': 0.7057798068506712, 'learning_rate': 9.688534414743045e-08, 'epoch': 0.94} + 94%|█████████▍| 11447/12188 [1:02:09<1:49:31, 8.87s/it] 94%|█████████▍| 11448/12188 [1:02:16<1:45:05, 8.52s/it] {'loss': 0.3274, 'grad_norm': 0.7265873181916543, 'learning_rate': 9.662521745754716e-08, 'epoch': 0.94} + 94%|█████████▍| 11448/12188 [1:02:16<1:45:05, 8.52s/it] 94%|█████████▍| 11449/12188 [1:02:24<1:41:04, 8.21s/it] {'loss': 0.2748, 'grad_norm': 0.7377738352545727, 'learning_rate': 9.63654370361139e-08, 'epoch': 0.94} + 94%|█████████▍| 11449/12188 [1:02:24<1:41:04, 8.21s/it] 94%|█████████▍| 11450/12188 [1:02:33<1:46:03, 8.62s/it] {'loss': 0.2875, 'grad_norm': 0.6887898273397809, 'learning_rate': 9.610600290147487e-08, 'epoch': 0.94} + 94%|█████████▍| 11450/12188 [1:02:33<1:46:03, 8.62s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f77bd5237e0> +[Try #0] Failed to fetch sample 4654491 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f77bd5237e0> +Problematic sample: {'image': '20240827_145511_before_screenshot_sub3.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Show desktop'"}, {'from': 'gpt', 'value': '\nclick(x=0.998, y=0.972)\n'}]} + 94%|█████████▍| 11451/12188 [1:02:41<1:41:34, 8.27s/it] {'loss': 0.3198, 'grad_norm': 0.7268902331079897, 'learning_rate': 9.584691507195098e-08, 'epoch': 0.94} + 94%|█████████▍| 11451/12188 [1:02:41<1:41:34, 8.27s/it] 94%|█████████▍| 11452/12188 [1:02:49<1:42:12, 8.33s/it] {'loss': 0.3372, 'grad_norm': 0.7756066819228182, 'learning_rate': 9.558817356583926e-08, 'epoch': 0.94} + 94%|█████████▍| 11452/12188 [1:02:49<1:42:12, 8.33s/it] 94%|█████████▍| 11453/12188 [1:02:58<1:43:25, 8.44s/it] {'loss': 0.2736, 'grad_norm': 0.7487825956126792, 'learning_rate': 9.532977840141123e-08, 'epoch': 0.94} + 94%|█████████▍| 11453/12188 [1:02:58<1:43:25, 8.44s/it] 94%|█████████▍| 11454/12188 [1:03:06<1:41:07, 8.27s/it] {'loss': 0.2774, 'grad_norm': 0.7279215288386756, 'learning_rate': 9.507172959691336e-08, 'epoch': 0.94} + 94%|█████████▍| 11454/12188 [1:03:06<1:41:07, 8.27s/it] 94%|█████████▍| 11455/12188 [1:03:13<1:37:52, 8.01s/it] {'loss': 0.3307, 'grad_norm': 0.7684900591157463, 'learning_rate': 9.481402717057053e-08, 'epoch': 0.94} + 94%|█████████▍| 11455/12188 [1:03:13<1:37:52, 8.01s/it] 94%|█████████▍| 11456/12188 [1:03:21<1:35:37, 7.84s/it] {'loss': 0.2962, 'grad_norm': 0.7222079140562472, 'learning_rate': 9.455667114057988e-08, 'epoch': 0.94} + 94%|█████████▍| 11456/12188 [1:03:21<1:35:37, 7.84s/it] 94%|█████████▍| 11457/12188 [1:03:29<1:35:49, 7.87s/it] {'loss': 0.3188, 'grad_norm': 0.9023910511683461, 'learning_rate': 9.429966152511627e-08, 'epoch': 0.94} + 94%|█████████▍| 11457/12188 [1:03:29<1:35:49, 7.87s/it] 94%|█████████▍| 11458/12188 [1:03:37<1:35:56, 7.89s/it] {'loss': 0.2626, 'grad_norm': 0.7323408846256841, 'learning_rate': 9.404299834232855e-08, 'epoch': 0.94} + 94%|█████████▍| 11458/12188 [1:03:37<1:35:56, 7.89s/it] 94%|█████████▍| 11459/12188 [1:03:44<1:35:24, 7.85s/it] {'loss': 0.2925, 'grad_norm': 0.678084038314316, 'learning_rate': 9.378668161034222e-08, 'epoch': 0.94} + 94%|█████████▍| 11459/12188 [1:03:44<1:35:24, 7.85s/it] 94%|█████████▍| 11460/12188 [1:03:54<1:42:34, 8.45s/it] {'loss': 0.2676, 'grad_norm': 0.6773146536634166, 'learning_rate': 9.353071134725888e-08, 'epoch': 0.94} + 94%|█████████▍| 11460/12188 [1:03:54<1:42:34, 8.45s/it] 94%|█████████▍| 11461/12188 [1:04:02<1:39:55, 8.25s/it] {'loss': 0.2996, 'grad_norm': 0.7191334700738516, 'learning_rate': 9.3275087571153e-08, 'epoch': 0.94} + 94%|█████████▍| 11461/12188 [1:04:02<1:39:55, 8.25s/it] 94%|█████████▍| 11462/12188 [1:04:10<1:37:57, 8.10s/it] {'loss': 0.2865, 'grad_norm': 0.7069648059663971, 'learning_rate': 9.301981030007734e-08, 'epoch': 0.94} + 94%|█████████▍| 11462/12188 [1:04:10<1:37:57, 8.10s/it] 94%|█████████▍| 11463/12188 [1:04:18<1:37:17, 8.05s/it] {'loss': 0.2637, 'grad_norm': 0.8005826709223106, 'learning_rate': 9.27648795520597e-08, 'epoch': 0.94} + 94%|███████��█▍| 11463/12188 [1:04:18<1:37:17, 8.05s/it] 94%|█████████▍| 11464/12188 [1:04:26<1:38:22, 8.15s/it] {'loss': 0.2903, 'grad_norm': 0.6631779215825939, 'learning_rate': 9.251029534510126e-08, 'epoch': 0.94} + 94%|█████████▍| 11464/12188 [1:04:26<1:38:22, 8.15s/it] 94%|█████████▍| 11465/12188 [1:04:34<1:38:55, 8.21s/it] {'loss': 0.3092, 'grad_norm': 0.7086006727272716, 'learning_rate': 9.225605769718149e-08, 'epoch': 0.94} + 94%|█████████▍| 11465/12188 [1:04:34<1:38:55, 8.21s/it] 94%|█████████▍| 11466/12188 [1:04:42<1:35:54, 7.97s/it] {'loss': 0.2579, 'grad_norm': 0.664737686550547, 'learning_rate': 9.200216662625439e-08, 'epoch': 0.94} + 94%|█████████▍| 11466/12188 [1:04:42<1:35:54, 7.97s/it] 94%|█████████▍| 11467/12188 [1:04:49<1:33:08, 7.75s/it] {'loss': 0.2873, 'grad_norm': 0.7508361526648413, 'learning_rate': 9.174862215024837e-08, 'epoch': 0.94} + 94%|█████████▍| 11467/12188 [1:04:49<1:33:08, 7.75s/it] 94%|█████████▍| 11468/12188 [1:04:59<1:41:30, 8.46s/it] {'loss': 0.2839, 'grad_norm': 0.6725443568134285, 'learning_rate': 9.14954242870697e-08, 'epoch': 0.94} + 94%|█████████▍| 11468/12188 [1:04:59<1:41:30, 8.46s/it] 94%|█████████▍| 11469/12188 [1:05:09<1:44:52, 8.75s/it] {'loss': 0.2685, 'grad_norm': 0.7709590716724483, 'learning_rate': 9.124257305459738e-08, 'epoch': 0.94} + 94%|█████████▍| 11469/12188 [1:05:09<1:44:52, 8.75s/it] 94%|█████████▍| 11470/12188 [1:05:17<1:41:36, 8.49s/it] {'loss': 0.2846, 'grad_norm': 0.7748988585923726, 'learning_rate': 9.099006847068826e-08, 'epoch': 0.94} + 94%|█████████▍| 11470/12188 [1:05:17<1:41:36, 8.49s/it] 94%|█████████▍| 11471/12188 [1:05:24<1:37:36, 8.17s/it] {'loss': 0.3106, 'grad_norm': 0.9226660646977534, 'learning_rate': 9.07379105531736e-08, 'epoch': 0.94} + 94%|█████████▍| 11471/12188 [1:05:24<1:37:36, 8.17s/it] 94%|█████████▍| 11472/12188 [1:05:31<1:34:52, 7.95s/it] {'loss': 0.3191, 'grad_norm': 0.6781604278001975, 'learning_rate': 9.04860993198603e-08, 'epoch': 0.94} + 94%|█████████▍| 11472/12188 [1:05:31<1:34:52, 7.95s/it] 94%|█████████▍| 11473/12188 [1:05:39<1:33:22, 7.84s/it] {'loss': 0.3073, 'grad_norm': 0.6706630661581654, 'learning_rate': 9.023463478853134e-08, 'epoch': 0.94} + 94%|█████████▍| 11473/12188 [1:05:39<1:33:22, 7.84s/it] 94%|█████████▍| 11474/12188 [1:05:47<1:32:29, 7.77s/it] {'loss': 0.3022, 'grad_norm': 0.6976323791989976, 'learning_rate': 8.998351697694363e-08, 'epoch': 0.94} + 94%|█████████▍| 11474/12188 [1:05:47<1:32:29, 7.77s/it] 94%|█████████▍| 11475/12188 [1:05:54<1:31:26, 7.69s/it] {'loss': 0.285, 'grad_norm': 0.8137035472646597, 'learning_rate': 8.973274590283188e-08, 'epoch': 0.94} + 94%|█████████▍| 11475/12188 [1:05:54<1:31:26, 7.69s/it] 94%|█████████▍| 11476/12188 [1:06:02<1:31:38, 7.72s/it] {'loss': 0.3242, 'grad_norm': 0.6854038704070666, 'learning_rate': 8.948232158390468e-08, 'epoch': 0.94} + 94%|█████████▍| 11476/12188 [1:06:02<1:31:38, 7.72s/it] 94%|█████████▍| 11477/12188 [1:06:09<1:31:02, 7.68s/it] {'loss': 0.2893, 'grad_norm': 0.6933678160368562, 'learning_rate': 8.923224403784681e-08, 'epoch': 0.94} + 94%|█████████▍| 11477/12188 [1:06:09<1:31:02, 7.68s/it] 94%|█████████▍| 11478/12188 [1:06:17<1:29:56, 7.60s/it] {'loss': 0.2696, 'grad_norm': 0.6508860653228502, 'learning_rate': 8.898251328231799e-08, 'epoch': 0.94} + 94%|█████████▍| 11478/12188 [1:06:17<1:29:56, 7.60s/it] 94%|█████████▍| 11479/12188 [1:06:27<1:38:09, 8.31s/it] {'loss': 0.2825, 'grad_norm': 0.66387184311223, 'learning_rate': 8.873312933495359e-08, 'epoch': 0.94} + 94%|████��████▍| 11479/12188 [1:06:27<1:38:09, 8.31s/it] 94%|█████████▍| 11480/12188 [1:06:35<1:37:23, 8.25s/it] {'loss': 0.2608, 'grad_norm': 0.6254492848877901, 'learning_rate': 8.848409221336452e-08, 'epoch': 0.94} + 94%|█████████▍| 11480/12188 [1:06:35<1:37:23, 8.25s/it] 94%|█████████▍| 11481/12188 [1:06:42<1:34:10, 7.99s/it] {'loss': 0.3053, 'grad_norm': 0.7001094807201339, 'learning_rate': 8.823540193513891e-08, 'epoch': 0.94} + 94%|█████████▍| 11481/12188 [1:06:42<1:34:10, 7.99s/it] 94%|█████████▍| 11482/12188 [1:06:50<1:33:03, 7.91s/it] {'loss': 0.298, 'grad_norm': 0.7367446995456215, 'learning_rate': 8.798705851783717e-08, 'epoch': 0.94} + 94%|█████████▍| 11482/12188 [1:06:50<1:33:03, 7.91s/it] 94%|█████████▍| 11483/12188 [1:06:58<1:32:00, 7.83s/it] {'loss': 0.2882, 'grad_norm': 0.7234311453615313, 'learning_rate': 8.773906197899861e-08, 'epoch': 0.94} + 94%|█████████▍| 11483/12188 [1:06:58<1:32:00, 7.83s/it] 94%|█████████▍| 11484/12188 [1:07:06<1:35:19, 8.12s/it] {'loss': 0.261, 'grad_norm': 0.697165993964216, 'learning_rate': 8.749141233613422e-08, 'epoch': 0.94} + 94%|█████████▍| 11484/12188 [1:07:07<1:35:19, 8.12s/it] 94%|█████████▍| 11485/12188 [1:07:14<1:34:07, 8.03s/it] {'loss': 0.2992, 'grad_norm': 0.7468877288977702, 'learning_rate': 8.72441096067339e-08, 'epoch': 0.94} + 94%|█████████▍| 11485/12188 [1:07:14<1:34:07, 8.03s/it] 94%|█████████▍| 11486/12188 [1:07:22<1:31:17, 7.80s/it] {'loss': 0.3019, 'grad_norm': 0.6881004219747189, 'learning_rate': 8.699715380826256e-08, 'epoch': 0.94} + 94%|█████████▍| 11486/12188 [1:07:22<1:31:17, 7.80s/it] 94%|█████████▍| 11487/12188 [1:07:29<1:31:21, 7.82s/it] {'loss': 0.3304, 'grad_norm': 0.7322255727381941, 'learning_rate': 8.675054495815793e-08, 'epoch': 0.94} + 94%|█████████▍| 11487/12188 [1:07:29<1:31:21, 7.82s/it] 94%|█████████▍| 11488/12188 [1:07:37<1:30:53, 7.79s/it] {'loss': 0.2816, 'grad_norm': 0.7822209792718776, 'learning_rate': 8.650428307383663e-08, 'epoch': 0.94} + 94%|█████████▍| 11488/12188 [1:07:37<1:30:53, 7.79s/it] 94%|█████████▍| 11489/12188 [1:07:45<1:31:26, 7.85s/it] {'loss': 0.2995, 'grad_norm': 0.7055538434914514, 'learning_rate': 8.625836817268806e-08, 'epoch': 0.94} + 94%|█████████▍| 11489/12188 [1:07:45<1:31:26, 7.85s/it] 94%|█████████▍| 11490/12188 [1:07:53<1:30:28, 7.78s/it] {'loss': 0.2738, 'grad_norm': 0.7191450127452647, 'learning_rate': 8.601280027208004e-08, 'epoch': 0.94} + 94%|█████████▍| 11490/12188 [1:07:53<1:30:28, 7.78s/it] 94%|█████████▍| 11491/12188 [1:08:02<1:34:38, 8.15s/it] {'loss': 0.3143, 'grad_norm': 0.6942053505925784, 'learning_rate': 8.576757938935254e-08, 'epoch': 0.94} + 94%|█████████▍| 11491/12188 [1:08:02<1:34:38, 8.15s/it] 94%|█████████▍| 11492/12188 [1:08:09<1:32:16, 7.95s/it] {'loss': 0.3037, 'grad_norm': 0.8665718632841662, 'learning_rate': 8.552270554182284e-08, 'epoch': 0.94} + 94%|█████████▍| 11492/12188 [1:08:09<1:32:16, 7.95s/it] 94%|█████████▍| 11493/12188 [1:08:18<1:34:49, 8.19s/it] {'loss': 0.2282, 'grad_norm': 0.7018224538944521, 'learning_rate': 8.52781787467849e-08, 'epoch': 0.94} + 94%|█████████▍| 11493/12188 [1:08:18<1:34:49, 8.19s/it] 94%|█████████▍| 11494/12188 [1:08:26<1:33:10, 8.06s/it] {'loss': 0.2605, 'grad_norm': 0.6553122668049707, 'learning_rate': 8.503399902150488e-08, 'epoch': 0.94} + 94%|█████████▍| 11494/12188 [1:08:26<1:33:10, 8.06s/it] 94%|█████████▍| 11495/12188 [1:08:34<1:32:15, 7.99s/it] {'loss': 0.3325, 'grad_norm': 0.7406374258371962, 'learning_rate': 8.479016638322791e-08, 'epoch': 0.94} + 94%|█████████▍| 11495/12188 [1:08:34<1:32:15, 7.99s/it] 94%|█████████▍| 11496/12188 [1:08:43<1:35:36, 8.29s/it] {'loss': 0.2998, 'grad_norm': 0.7510378505301605, 'learning_rate': 8.45466808491724e-08, 'epoch': 0.94} + 94%|█████████▍| 11496/12188 [1:08:43<1:35:36, 8.29s/it] 94%|█████████▍| 11497/12188 [1:08:50<1:32:19, 8.02s/it] {'loss': 0.3286, 'grad_norm': 0.7087174324181473, 'learning_rate': 8.430354243653294e-08, 'epoch': 0.94} + 94%|█████████▍| 11497/12188 [1:08:50<1:32:19, 8.02s/it] 94%|█████████▍| 11498/12188 [1:08:58<1:31:09, 7.93s/it] {'loss': 0.3223, 'grad_norm': 0.6404028587349707, 'learning_rate': 8.40607511624797e-08, 'epoch': 0.94} + 94%|█████████▍| 11498/12188 [1:08:58<1:31:09, 7.93s/it] 94%|█████████▍| 11499/12188 [1:09:08<1:40:28, 8.75s/it] {'loss': 0.2705, 'grad_norm': 0.7128122833506783, 'learning_rate': 8.381830704415839e-08, 'epoch': 0.94} + 94%|█████████▍| 11499/12188 [1:09:08<1:40:28, 8.75s/it] 94%|█████████▍| 11500/12188 [1:09:16<1:37:05, 8.47s/it] {'loss': 0.3267, 'grad_norm': 0.6431078259141513, 'learning_rate': 8.357621009868921e-08, 'epoch': 0.94} + 94%|█████████▍| 11500/12188 [1:09:16<1:37:05, 8.47s/it] 94%|█████████▍| 11501/12188 [1:09:24<1:34:28, 8.25s/it] {'loss': 0.2753, 'grad_norm': 0.6609161649635266, 'learning_rate': 8.333446034317017e-08, 'epoch': 0.94} + 94%|█████████▍| 11501/12188 [1:09:24<1:34:28, 8.25s/it] 94%|█████████▍| 11502/12188 [1:09:31<1:31:46, 8.03s/it] {'loss': 0.2952, 'grad_norm': 0.7160954714356189, 'learning_rate': 8.309305779467147e-08, 'epoch': 0.94} + 94%|█████████▍| 11502/12188 [1:09:31<1:31:46, 8.03s/it] 94%|█████████▍| 11503/12188 [1:09:39<1:29:15, 7.82s/it] {'loss': 0.3393, 'grad_norm': 0.7711993144787835, 'learning_rate': 8.285200247024172e-08, 'epoch': 0.94} + 94%|█████████▍| 11503/12188 [1:09:39<1:29:15, 7.82s/it] 94%|█████████▍| 11504/12188 [1:09:46<1:28:05, 7.73s/it] {'loss': 0.2964, 'grad_norm': 0.6648171728000005, 'learning_rate': 8.26112943869034e-08, 'epoch': 0.94} + 94%|█████████▍| 11504/12188 [1:09:46<1:28:05, 7.73s/it] 94%|█████████▍| 11505/12188 [1:09:54<1:27:49, 7.72s/it] {'loss': 0.2638, 'grad_norm': 0.6710913119965035, 'learning_rate': 8.237093356165571e-08, 'epoch': 0.94} + 94%|█████████▍| 11505/12188 [1:09:54<1:27:49, 7.72s/it] 94%|█████████▍| 11506/12188 [1:10:02<1:27:58, 7.74s/it] {'loss': 0.2986, 'grad_norm': 0.6680039168548512, 'learning_rate': 8.213092001147117e-08, 'epoch': 0.94} + 94%|█████████▍| 11506/12188 [1:10:02<1:27:58, 7.74s/it] 94%|█████████▍| 11507/12188 [1:10:10<1:30:26, 7.97s/it] {'loss': 0.3145, 'grad_norm': 0.8254647749936619, 'learning_rate': 8.189125375330009e-08, 'epoch': 0.94} + 94%|█████████▍| 11507/12188 [1:10:10<1:30:26, 7.97s/it] 94%|█████████▍| 11508/12188 [1:10:18<1:28:40, 7.82s/it] {'loss': 0.3112, 'grad_norm': 0.7193503975875055, 'learning_rate': 8.16519348040673e-08, 'epoch': 0.94} + 94%|█████████▍| 11508/12188 [1:10:18<1:28:40, 7.82s/it] 94%|█████████▍| 11509/12188 [1:10:26<1:30:47, 8.02s/it] {'loss': 0.3289, 'grad_norm': 0.7609394532573432, 'learning_rate': 8.14129631806726e-08, 'epoch': 0.94} + 94%|█████████▍| 11509/12188 [1:10:26<1:30:47, 8.02s/it] 94%|█████████▍| 11510/12188 [1:10:34<1:30:53, 8.04s/it] {'loss': 0.2862, 'grad_norm': 0.7261217644712346, 'learning_rate': 8.117433889999248e-08, 'epoch': 0.94} + 94%|█████████▍| 11510/12188 [1:10:34<1:30:53, 8.04s/it] 94%|█████████▍| 11511/12188 [1:10:42<1:28:40, 7.86s/it] {'loss': 0.2902, 'grad_norm': 0.7346362788092017, 'learning_rate': 8.093606197887793e-08, 'epoch': 0.94} + 94%|█████████▍| 11511/12188 [1:10:42<1:28:40, 7.86s/it] 94%|█████████▍| 11512/12188 [1:10:49<1:27:04, 7.73s/it] {'loss': 0.3116, 'grad_norm': 0.685341725041379, 'learning_rate': 8.069813243415492e-08, 'epoch': 0.94} + 94%|█████████▍| 11512/12188 [1:10:49<1:27:04, 7.73s/it] 94%|█████████▍| 11513/12188 [1:10:57<1:28:05, 7.83s/it] {'loss': 0.3236, 'grad_norm': 0.7197600106547691, 'learning_rate': 8.046055028262667e-08, 'epoch': 0.94} + 94%|█████████▍| 11513/12188 [1:10:57<1:28:05, 7.83s/it] 94%|█████████▍| 11514/12188 [1:11:05<1:29:02, 7.93s/it] {'loss': 0.2995, 'grad_norm': 0.7767876637259552, 'learning_rate': 8.022331554107088e-08, 'epoch': 0.94} + 94%|█████████▍| 11514/12188 [1:11:05<1:29:02, 7.93s/it] 94%|█████████▍| 11515/12188 [1:11:13<1:28:07, 7.86s/it] {'loss': 0.3159, 'grad_norm': 0.7167543509770662, 'learning_rate': 7.998642822624025e-08, 'epoch': 0.94} + 94%|█████████▍| 11515/12188 [1:11:13<1:28:07, 7.86s/it] 94%|█████████▍| 11516/12188 [1:11:21<1:27:29, 7.81s/it] {'loss': 0.2962, 'grad_norm': 0.8018397332447394, 'learning_rate': 7.974988835486308e-08, 'epoch': 0.94} + 94%|█████████▍| 11516/12188 [1:11:21<1:27:29, 7.81s/it] 94%|█████████▍| 11517/12188 [1:11:28<1:26:07, 7.70s/it] {'loss': 0.2928, 'grad_norm': 0.7198641076539729, 'learning_rate': 7.951369594364433e-08, 'epoch': 0.94} + 94%|█████████▍| 11517/12188 [1:11:28<1:26:07, 7.70s/it] 95%|█████████▍| 11518/12188 [1:11:35<1:24:36, 7.58s/it] {'loss': 0.3084, 'grad_norm': 0.7531539889308981, 'learning_rate': 7.927785100926288e-08, 'epoch': 0.94} + 95%|█████████▍| 11518/12188 [1:11:36<1:24:36, 7.58s/it] 95%|█████████▍| 11519/12188 [1:11:43<1:25:03, 7.63s/it] {'loss': 0.3057, 'grad_norm': 0.7228436373021899, 'learning_rate': 7.904235356837376e-08, 'epoch': 0.95} + 95%|█████████▍| 11519/12188 [1:11:43<1:25:03, 7.63s/it] 95%|█████████▍| 11520/12188 [1:11:51<1:25:46, 7.70s/it] {'loss': 0.2591, 'grad_norm': 0.6561254685139658, 'learning_rate': 7.880720363760753e-08, 'epoch': 0.95} + 95%|█████████▍| 11520/12188 [1:11:51<1:25:46, 7.70s/it] 95%|█████████▍| 11521/12188 [1:12:01<1:32:35, 8.33s/it] {'loss': 0.3089, 'grad_norm': 0.7648048534035438, 'learning_rate': 7.85724012335698e-08, 'epoch': 0.95} + 95%|█████████▍| 11521/12188 [1:12:01<1:32:35, 8.33s/it] 95%|█████████▍| 11522/12188 [1:12:08<1:29:26, 8.06s/it] {'loss': 0.3155, 'grad_norm': 0.6633559754210834, 'learning_rate': 7.833794637284232e-08, 'epoch': 0.95} + 95%|█████████▍| 11522/12188 [1:12:08<1:29:26, 8.06s/it] 95%|█████████▍| 11523/12188 [1:12:19<1:36:31, 8.71s/it] {'loss': 0.2582, 'grad_norm': 0.6130192923942714, 'learning_rate': 7.810383907198182e-08, 'epoch': 0.95} + 95%|█████████▍| 11523/12188 [1:12:19<1:36:31, 8.71s/it] 95%|█████████▍| 11524/12188 [1:12:26<1:33:20, 8.43s/it] {'loss': 0.2699, 'grad_norm': 1.3514072613305377, 'learning_rate': 7.787007934752067e-08, 'epoch': 0.95} + 95%|█████████▍| 11524/12188 [1:12:26<1:33:20, 8.43s/it] 95%|█████████▍| 11525/12188 [1:12:34<1:30:06, 8.15s/it] {'loss': 0.2654, 'grad_norm': 0.6541022912331392, 'learning_rate': 7.763666721596674e-08, 'epoch': 0.95} + 95%|█████████▍| 11525/12188 [1:12:34<1:30:06, 8.15s/it] 95%|█████████▍| 11526/12188 [1:12:41<1:27:51, 7.96s/it] {'loss': 0.3299, 'grad_norm': 0.7482947846810067, 'learning_rate': 7.740360269380243e-08, 'epoch': 0.95} + 95%|█████████▍| 11526/12188 [1:12:41<1:27:51, 7.96s/it] 95%|█████████▍| 11527/12188 [1:12:49<1:27:03, 7.90s/it] {'loss': 0.3014, 'grad_norm': 0.6834654716709402, 'learning_rate': 7.717088579748733e-08, 'epoch': 0.95} + 95%|█████████▍| 11527/12188 [1:12:49<1:27:03, 7.90s/it] 95%|█████████▍| 11528/12188 [1:12:57<1:27:24, 7.95s/it] {'loss': 0.3105, 'grad_norm': 0.7910266505529446, 'learning_rate': 7.693851654345497e-08, 'epoch': 0.95} + 95%|█████████▍| 11528/12188 [1:12:57<1:27:24, 7.95s/it] 95%|█████████▍| 11529/12188 [1:13:05<1:25:25, 7.78s/it] {'loss': 0.3013, 'grad_norm': 0.7208158964372585, 'learning_rate': 7.670649494811555e-08, 'epoch': 0.95} + 95%|█████████▍| 11529/12188 [1:13:05<1:25:25, 7.78s/it] 95%|█████████▍| 11530/12188 [1:13:12<1:23:42, 7.63s/it] {'loss': 0.3015, 'grad_norm': 0.7179922695152416, 'learning_rate': 7.647482102785264e-08, 'epoch': 0.95} + 95%|█████████▍| 11530/12188 [1:13:12<1:23:42, 7.63s/it] 95%|█████████▍| 11531/12188 [1:13:20<1:24:46, 7.74s/it] {'loss': 0.3441, 'grad_norm': 1.053499872628843, 'learning_rate': 7.62434947990287e-08, 'epoch': 0.95} + 95%|█████████▍| 11531/12188 [1:13:20<1:24:46, 7.74s/it] 95%|█████████▍| 11532/12188 [1:13:27<1:24:16, 7.71s/it] {'loss': 0.3129, 'grad_norm': 0.7241879030965517, 'learning_rate': 7.60125162779779e-08, 'epoch': 0.95} + 95%|█████████▍| 11532/12188 [1:13:27<1:24:16, 7.71s/it] 95%|█████████▍| 11533/12188 [1:13:35<1:23:00, 7.60s/it] {'loss': 0.3062, 'grad_norm': 0.7899809994912884, 'learning_rate': 7.578188548101217e-08, 'epoch': 0.95} + 95%|█████████▍| 11533/12188 [1:13:35<1:23:00, 7.60s/it] 95%|█████████▍| 11534/12188 [1:13:42<1:22:51, 7.60s/it] {'loss': 0.3173, 'grad_norm': 0.7352672876745031, 'learning_rate': 7.55516024244185e-08, 'epoch': 0.95} + 95%|█████████▍| 11534/12188 [1:13:42<1:22:51, 7.60s/it] 95%|█████████▍| 11535/12188 [1:13:52<1:28:57, 8.17s/it] {'loss': 0.3205, 'grad_norm': 0.6668356450437638, 'learning_rate': 7.532166712445888e-08, 'epoch': 0.95} + 95%|█████████▍| 11535/12188 [1:13:52<1:28:57, 8.17s/it] 95%|█████████▍| 11536/12188 [1:14:00<1:29:10, 8.21s/it] {'loss': 0.3063, 'grad_norm': 0.755542719906116, 'learning_rate': 7.509207959737086e-08, 'epoch': 0.95} + 95%|█████████▍| 11536/12188 [1:14:00<1:29:10, 8.21s/it] 95%|█████████▍| 11537/12188 [1:14:08<1:28:36, 8.17s/it] {'loss': 0.3142, 'grad_norm': 0.681026534198614, 'learning_rate': 7.486283985936815e-08, 'epoch': 0.95} + 95%|█████████▍| 11537/12188 [1:14:08<1:28:36, 8.17s/it] 95%|█████████▍| 11538/12188 [1:14:16<1:27:04, 8.04s/it] {'loss': 0.2736, 'grad_norm': 0.6237727887729317, 'learning_rate': 7.463394792663891e-08, 'epoch': 0.95} + 95%|█████████▍| 11538/12188 [1:14:16<1:27:04, 8.04s/it] 95%|█████████▍| 11539/12188 [1:14:27<1:34:48, 8.77s/it] {'loss': 0.2974, 'grad_norm': 0.7638572748357146, 'learning_rate': 7.440540381534633e-08, 'epoch': 0.95} + 95%|█████████▍| 11539/12188 [1:14:27<1:34:48, 8.77s/it] 95%|█████████▍| 11540/12188 [1:14:34<1:29:50, 8.32s/it] {'loss': 0.3289, 'grad_norm': 0.7131684810039436, 'learning_rate': 7.417720754163138e-08, 'epoch': 0.95} + 95%|█████████▍| 11540/12188 [1:14:34<1:29:50, 8.32s/it] 95%|█████████▍| 11541/12188 [1:14:41<1:26:53, 8.06s/it] {'loss': 0.3095, 'grad_norm': 0.730520466899204, 'learning_rate': 7.394935912160728e-08, 'epoch': 0.95} + 95%|█████████▍| 11541/12188 [1:14:41<1:26:53, 8.06s/it] 95%|█████████▍| 11542/12188 [1:14:50<1:27:34, 8.13s/it] {'loss': 0.2549, 'grad_norm': 0.6413106345185771, 'learning_rate': 7.372185857136616e-08, 'epoch': 0.95} + 95%|█████████▍| 11542/12188 [1:14:50<1:27:34, 8.13s/it] 95%|█████████▍| 11543/12188 [1:14:57<1:25:08, 7.92s/it] {'loss': 0.3206, 'grad_norm': 0.7914003476836075, 'learning_rate': 7.349470590697183e-08, 'epoch': 0.95} + 95%|█████████▍| 11543/12188 [1:14:57<1:25:08, 7.92s/it] 95%|█████████▍| 11544/12188 [1:15:04<1:23:09, 7.75s/it] {'loss': 0.2841, 'grad_norm': 0.8292503670826639, 'learning_rate': 7.326790114446647e-08, 'epoch': 0.95} + 95%|█████████▍| 11544/12188 [1:15:04<1:23:09, 7.75s/it] 95%|█████████▍| 11545/12188 [1:15:12<1:22:17, 7.68s/it] {'loss': 0.3013, 'grad_norm': 0.7766804640250751, 'learning_rate': 7.30414442998667e-08, 'epoch': 0.95} + 95%|█████████▍| 11545/12188 [1:15:12<1:22:17, 7.68s/it] 95%|█████████▍| 11546/12188 [1:15:19<1:21:09, 7.58s/it] {'loss': 0.2861, 'grad_norm': 0.7569266832408227, 'learning_rate': 7.281533538916474e-08, 'epoch': 0.95} + 95%|█████████▍| 11546/12188 [1:15:19<1:21:09, 7.58s/it] 95%|█████████▍| 11547/12188 [1:15:26<1:20:06, 7.50s/it] {'loss': 0.3521, 'grad_norm': 0.7073713388128995, 'learning_rate': 7.258957442832726e-08, 'epoch': 0.95} + 95%|█████████▍| 11547/12188 [1:15:27<1:20:06, 7.50s/it] 95%|█████████▍| 11548/12188 [1:15:34<1:21:04, 7.60s/it] {'loss': 0.2795, 'grad_norm': 0.7069615044443939, 'learning_rate': 7.236416143329761e-08, 'epoch': 0.95} + 95%|█████████▍| 11548/12188 [1:15:34<1:21:04, 7.60s/it] 95%|█████████▍| 11549/12188 [1:15:42<1:20:25, 7.55s/it] {'loss': 0.3164, 'grad_norm': 0.7593124967028164, 'learning_rate': 7.213909641999361e-08, 'epoch': 0.95} + 95%|█████████▍| 11549/12188 [1:15:42<1:20:25, 7.55s/it] 95%|█████████▍| 11550/12188 [1:15:50<1:20:54, 7.61s/it] {'loss': 0.2631, 'grad_norm': 0.7030275355544322, 'learning_rate': 7.191437940431034e-08, 'epoch': 0.95} + 95%|█████████▍| 11550/12188 [1:15:50<1:20:54, 7.61s/it] 95%|█████████▍| 11551/12188 [1:16:00<1:29:05, 8.39s/it] {'loss': 0.3065, 'grad_norm': 0.7047416719048204, 'learning_rate': 7.16900104021151e-08, 'epoch': 0.95} + 95%|█████████▍| 11551/12188 [1:16:00<1:29:05, 8.39s/it] 95%|█████████▍| 11552/12188 [1:16:09<1:30:18, 8.52s/it] {'loss': 0.3081, 'grad_norm': 0.70642068740684, 'learning_rate': 7.146598942925353e-08, 'epoch': 0.95} + 95%|█████████▍| 11552/12188 [1:16:09<1:30:18, 8.52s/it] 95%|█████████▍| 11553/12188 [1:16:16<1:27:55, 8.31s/it] {'loss': 0.2826, 'grad_norm': 0.6977539222829745, 'learning_rate': 7.124231650154633e-08, 'epoch': 0.95} + 95%|█████████▍| 11553/12188 [1:16:16<1:27:55, 8.31s/it] 95%|█████████▍| 11554/12188 [1:16:24<1:24:45, 8.02s/it] {'loss': 0.2692, 'grad_norm': 0.7352318816392833, 'learning_rate': 7.101899163478698e-08, 'epoch': 0.95} + 95%|█████████▍| 11554/12188 [1:16:24<1:24:45, 8.02s/it] 95%|█████████▍| 11555/12188 [1:16:32<1:25:14, 8.08s/it] {'loss': 0.3286, 'grad_norm': 0.6485382777894068, 'learning_rate': 7.079601484474841e-08, 'epoch': 0.95} + 95%|█████████▍| 11555/12188 [1:16:32<1:25:14, 8.08s/it] 95%|█████████▍| 11556/12188 [1:16:39<1:22:58, 7.88s/it] {'loss': 0.2876, 'grad_norm': 0.7489884914580902, 'learning_rate': 7.057338614717523e-08, 'epoch': 0.95} + 95%|█████████▍| 11556/12188 [1:16:39<1:22:58, 7.88s/it] 95%|█████████▍| 11557/12188 [1:16:47<1:21:45, 7.77s/it] {'loss': 0.2981, 'grad_norm': 0.7037828799217508, 'learning_rate': 7.03511055577899e-08, 'epoch': 0.95} + 95%|█████████▍| 11557/12188 [1:16:47<1:21:45, 7.77s/it] 95%|█████████▍| 11558/12188 [1:16:55<1:24:03, 8.01s/it] {'loss': 0.2707, 'grad_norm': 0.670332478639947, 'learning_rate': 7.012917309228984e-08, 'epoch': 0.95} + 95%|█████████▍| 11558/12188 [1:16:55<1:24:03, 8.01s/it] 95%|█████████▍| 11559/12188 [1:17:03<1:23:29, 7.96s/it] {'loss': 0.3036, 'grad_norm': 0.7155784798390297, 'learning_rate': 6.990758876634695e-08, 'epoch': 0.95} + 95%|█████████▍| 11559/12188 [1:17:03<1:23:29, 7.96s/it] 95%|█████████▍| 11560/12188 [1:17:11<1:21:25, 7.78s/it] {'loss': 0.2846, 'grad_norm': 0.7863270248620298, 'learning_rate': 6.968635259560874e-08, 'epoch': 0.95} + 95%|█████████▍| 11560/12188 [1:17:11<1:21:25, 7.78s/it] 95%|█████████▍| 11561/12188 [1:17:18<1:19:55, 7.65s/it] {'loss': 0.2535, 'grad_norm': 0.8666710398026094, 'learning_rate': 6.946546459569991e-08, 'epoch': 0.95} + 95%|█████████▍| 11561/12188 [1:17:18<1:19:55, 7.65s/it] 95%|█████████▍| 11562/12188 [1:17:26<1:19:57, 7.66s/it] {'loss': 0.273, 'grad_norm': 0.6496157312306675, 'learning_rate': 6.9244924782218e-08, 'epoch': 0.95} + 95%|█████████▍| 11562/12188 [1:17:26<1:19:57, 7.66s/it] 95%|█████████▍| 11563/12188 [1:17:35<1:26:19, 8.29s/it] {'loss': 0.2996, 'grad_norm': 0.7464031494779539, 'learning_rate': 6.902473317073776e-08, 'epoch': 0.95} + 95%|█████████▍| 11563/12188 [1:17:35<1:26:19, 8.29s/it] 95%|█████████▍| 11564/12188 [1:17:43<1:23:13, 8.00s/it] {'loss': 0.2922, 'grad_norm': 0.7743515508692638, 'learning_rate': 6.880488977680899e-08, 'epoch': 0.95} + 95%|█████████▍| 11564/12188 [1:17:43<1:23:13, 8.00s/it] 95%|█████████▍| 11565/12188 [1:17:52<1:26:34, 8.34s/it] {'loss': 0.2996, 'grad_norm': 0.6872132288112274, 'learning_rate': 6.858539461595593e-08, 'epoch': 0.95} + 95%|█████████▍| 11565/12188 [1:17:52<1:26:34, 8.34s/it] 95%|█████████▍| 11566/12188 [1:17:59<1:24:12, 8.12s/it] {'loss': 0.2837, 'grad_norm': 0.6323569730298406, 'learning_rate': 6.83662477036795e-08, 'epoch': 0.95} + 95%|█████████▍| 11566/12188 [1:18:00<1:24:12, 8.12s/it] 95%|█████████▍| 11567/12188 [1:18:07<1:22:17, 7.95s/it] {'loss': 0.3127, 'grad_norm': 0.7146995360155519, 'learning_rate': 6.814744905545512e-08, 'epoch': 0.95} + 95%|█████████▍| 11567/12188 [1:18:07<1:22:17, 7.95s/it] 95%|█████████▍| 11568/12188 [1:18:15<1:23:17, 8.06s/it] {'loss': 0.2711, 'grad_norm': 0.674149265133856, 'learning_rate': 6.792899868673487e-08, 'epoch': 0.95} + 95%|█████████▍| 11568/12188 [1:18:15<1:23:17, 8.06s/it] 95%|█████████▍| 11569/12188 [1:18:23<1:21:36, 7.91s/it] {'loss': 0.3186, 'grad_norm': 0.7050749829592777, 'learning_rate': 6.771089661294417e-08, 'epoch': 0.95} + 95%|█████████▍| 11569/12188 [1:18:23<1:21:36, 7.91s/it] 95%|█████████▍| 11570/12188 [1:18:31<1:22:09, 7.98s/it] {'loss': 0.2412, 'grad_norm': 0.7080718004137615, 'learning_rate': 6.749314284948571e-08, 'epoch': 0.95} + 95%|█████████▍| 11570/12188 [1:18:31<1:22:09, 7.98s/it] 95%|█████████▍| 11571/12188 [1:18:38<1:20:11, 7.80s/it] {'loss': 0.3474, 'grad_norm': 0.7257453845881495, 'learning_rate': 6.727573741173721e-08, 'epoch': 0.95} + 95%|█████████▍| 11571/12188 [1:18:38<1:20:11, 7.80s/it] 95%|█████████▍| 11572/12188 [1:18:46<1:19:18, 7.73s/it] {'loss': 0.2653, 'grad_norm': 0.7507020156443684, 'learning_rate': 6.705868031505025e-08, 'epoch': 0.95} + 95%|█████████▍| 11572/12188 [1:18:46<1:19:18, 7.73s/it] 95%|█████████▍| 11573/12188 [1:18:53<1:18:22, 7.65s/it] {'loss': 0.2788, 'grad_norm': 0.7152057311059684, 'learning_rate': 6.68419715747548e-08, 'epoch': 0.95} + 95%|█████████▍| 11573/12188 [1:18:53<1:18:22, 7.65s/it] 95%|█████████▍| 11574/12188 [1:19:01<1:17:59, 7.62s/it] {'loss': 0.3003, 'grad_norm': 0.7336896137863117, 'learning_rate': 6.662561120615363e-08, 'epoch': 0.95} + 95%|█████████▍| 11574/12188 [1:19:01<1:17:59, 7.62s/it] 95%|█████████▍| 11575/12188 [1:19:08<1:17:05, 7.55s/it] {'loss': 0.3523, 'grad_norm': 0.7131525452173534, 'learning_rate': 6.640959922452506e-08, 'epoch': 0.95} + 95%|█████████▍| 11575/12188 [1:19:08<1:17:05, 7.55s/it] 95%|█████████▍| 11576/12188 [1:19:16<1:16:37, 7.51s/it] {'loss': 0.3117, 'grad_norm': 0.6911459016783086, 'learning_rate': 6.619393564512466e-08, 'epoch': 0.95} + 95%|█████████▍| 11576/12188 [1:19:16<1:16:37, 7.51s/it] 95%|█████████▍| 11577/12188 [1:19:23<1:16:23, 7.50s/it] {'loss': 0.3102, 'grad_norm': 0.6441670966913938, 'learning_rate': 6.597862048318193e-08, 'epoch': 0.95} + 95%|█████████▍| 11577/12188 [1:19:23<1:16:23, 7.50s/it] 95%|█████████▍| 11578/12188 [1:19:31<1:16:18, 7.51s/it] {'loss': 0.2816, 'grad_norm': 0.708464484553245, 'learning_rate': 6.576365375390137e-08, 'epoch': 0.95} + 95%|█████████▍| 11578/12188 [1:19:31<1:16:18, 7.51s/it] 95%|█████████▌| 11579/12188 [1:19:38<1:15:36, 7.45s/it] {'loss': 0.2918, 'grad_norm': 0.8331564780285896, 'learning_rate': 6.554903547246416e-08, 'epoch': 0.95} + 95%|█████████▌| 11579/12188 [1:19:38<1:15:36, 7.45s/it] 95%|█████████▌| 11580/12188 [1:19:46<1:16:36, 7.56s/it] {'loss': 0.2862, 'grad_norm': 0.7211721795479534, 'learning_rate': 6.533476565402653e-08, 'epoch': 0.95} + 95%|█████████▌| 11580/12188 [1:19:46<1:16:36, 7.56s/it] 95%|█████████▌| 11581/12188 [1:19:54<1:19:08, 7.82s/it] {'loss': 0.3288, 'grad_norm': 0.6665651631275498, 'learning_rate': 6.512084431371968e-08, 'epoch': 0.95} + 95%|█████████▌| 11581/12188 [1:19:54<1:19:08, 7.82s/it] 95%|█████████▌| 11582/12188 [1:20:02<1:17:47, 7.70s/it] {'loss': 0.2972, 'grad_norm': 0.7779070189916172, 'learning_rate': 6.490727146664988e-08, 'epoch': 0.95} + 95%|█████████▌| 11582/12188 [1:20:02<1:17:47, 7.70s/it] 95%|█████████▌| 11583/12188 [1:20:10<1:17:46, 7.71s/it] {'loss': 0.2875, 'grad_norm': 0.7662630165966777, 'learning_rate': 6.469404712790062e-08, 'epoch': 0.95} + 95%|█████████▌| 11583/12188 [1:20:10<1:17:46, 7.71s/it] 95%|█████████▌| 11584/12188 [1:20:17<1:16:39, 7.62s/it] {'loss': 0.293, 'grad_norm': 0.8093046489790418, 'learning_rate': 6.448117131252762e-08, 'epoch': 0.95} + 95%|█████████▌| 11584/12188 [1:20:17<1:16:39, 7.62s/it] 95%|█████████▌| 11585/12188 [1:20:25<1:16:24, 7.60s/it] {'loss': 0.2507, 'grad_norm': 0.6786021819703811, 'learning_rate': 6.426864403556499e-08, 'epoch': 0.95} + 95%|█████████▌| 11585/12188 [1:20:25<1:16:24, 7.60s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f5ec638bc90> +[Try #0] Failed to fetch sample 4556147 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f5ec638bc90> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'MediaWiki-announce '"}, {'from': 'gpt', 'value': '\nclick(x=0.261, y=0.1035)\n'}]} + 95%|█████████▌| 11586/12188 [1:20:36<1:26:38, 8.64s/it] {'loss': 0.3109, 'grad_norm': 0.6268263005150039, 'learning_rate': 6.405646531202126e-08, 'epoch': 0.95} + 95%|█████████▌| 11586/12188 [1:20:36<1:26:38, 8.64s/it] 95%|█████████▌| 11587/12188 [1:20:43<1:23:16, 8.31s/it] {'loss': 0.2632, 'grad_norm': 0.6555697327105452, 'learning_rate': 6.384463515687945e-08, 'epoch': 0.95} + 95%|█████████▌| 11587/12188 [1:20:43<1:23:16, 8.31s/it] 95%|█████████▌| 11588/12188 [1:20:51<1:20:40, 8.07s/it] {'loss': 0.3086, 'grad_norm': 0.9631445474291255, 'learning_rate': 6.363315358509926e-08, 'epoch': 0.95} + 95%|█████████▌| 11588/12188 [1:20:51<1:20:40, 8.07s/it] 95%|█████████▌| 11589/12188 [1:20:58<1:19:10, 7.93s/it] {'loss': 0.2961, 'grad_norm': 0.6926316201432011, 'learning_rate': 6.342202061161485e-08, 'epoch': 0.95} + 95%|█████████▌| 11589/12188 [1:20:58<1:19:10, 7.93s/it] 95%|█████████▌| 11590/12188 [1:21:07<1:20:51, 8.11s/it] {'loss': 0.2958, 'grad_norm': 0.7800170289582182, 'learning_rate': 6.32112362513354e-08, 'epoch': 0.95} + 95%|█████████▌| 11590/12188 [1:21:07<1:20:51, 8.11s/it] 95%|█████████▌| 11591/12188 [1:21:15<1:19:57, 8.04s/it] {'loss': 0.2451, 'grad_norm': 0.7498247219864855, 'learning_rate': 6.300080051914792e-08, 'epoch': 0.95} + 95%|█████████▌| 11591/12188 [1:21:15<1:19:57, 8.04s/it] 95%|█████████▌| 11592/12188 [1:21:22<1:17:14, 7.78s/it] {'loss': 0.3026, 'grad_norm': 0.7490158684690564, 'learning_rate': 6.279071342991106e-08, 'epoch': 0.95} + 95%|█████████▌| 11592/12188 [1:21:22<1:17:14, 7.78s/it] 95%|█████████▌| 11593/12188 [1:21:29<1:15:45, 7.64s/it] {'loss': 0.2806, 'grad_norm': 0.7708519609944678, 'learning_rate': 6.258097499846238e-08, 'epoch': 0.95} + 95%|█████████▌| 11593/12188 [1:21:29<1:15:45, 7.64s/it] 95%|█████████▌| 11594/12188 [1:21:36<1:14:41, 7.54s/it] {'loss': 0.3029, 'grad_norm': 0.6763920187398628, 'learning_rate': 6.237158523961284e-08, 'epoch': 0.95} + 95%|█████████▌| 11594/12188 [1:21:36<1:14:41, 7.54s/it] 95%|█████████▌| 11595/12188 [1:21:46<1:21:14, 8.22s/it] {'loss': 0.3113, 'grad_norm': 0.8800498508458161, 'learning_rate': 6.216254416814837e-08, 'epoch': 0.95} + 95%|█████████▌| 11595/12188 [1:21:46<1:21:14, 8.22s/it] 95%|█████████▌| 11596/12188 [1:21:54<1:18:38, 7.97s/it] {'loss': 0.2862, 'grad_norm': 0.6632601102159557, 'learning_rate': 6.195385179883218e-08, 'epoch': 0.95} + 95%|█████████▌| 11596/12188 [1:21:54<1:18:38, 7.97s/it] 95%|█████████▌| 11597/12188 [1:22:01<1:16:32, 7.77s/it] {'loss': 0.3493, 'grad_norm': 0.7765305025720091, 'learning_rate': 6.174550814640135e-08, 'epoch': 0.95} + 95%|█████████▌| 11597/12188 [1:22:01<1:16:32, 7.77s/it] 95%|█████████▌| 11598/12188 [1:22:09<1:16:24, 7.77s/it] {'loss': 0.2722, 'grad_norm': 0.7248034626895643, 'learning_rate': 6.153751322556855e-08, 'epoch': 0.95} + 95%|█████████▌| 11598/12188 [1:22:09<1:16:24, 7.77s/it] 95%|█████████▌| 11599/12188 [1:22:17<1:17:41, 7.91s/it] {'loss': 0.2857, 'grad_norm': 0.7189031701545107, 'learning_rate': 6.132986705102317e-08, 'epoch': 0.95} + 95%|█████████▌| 11599/12188 [1:22:17<1:17:41, 7.91s/it] 95%|█████████▌| 11600/12188 [1:22:25<1:17:39, 7.92s/it] {'loss': 0.2774, 'grad_norm': 1.0673086684779385, 'learning_rate': 6.112256963742735e-08, 'epoch': 0.95} + 95%|█████████▌| 11600/12188 [1:22:25<1:17:39, 7.92s/it] 95%|█████████▌| 11601/12188 [1:22:33<1:16:55, 7.86s/it] {'loss': 0.2914, 'grad_norm': 0.7306987898428707, 'learning_rate': 6.091562099942106e-08, 'epoch': 0.95} + 95%|█████████▌| 11601/12188 [1:22:33<1:16:55, 7.86s/it] 95%|█████████▌| 11602/12188 [1:22:40<1:15:19, 7.71s/it] {'loss': 0.3061, 'grad_norm': 0.7177292746121716, 'learning_rate': 6.070902115161814e-08, 'epoch': 0.95} + 95%|█████████▌| 11602/12188 [1:22:40<1:15:19, 7.71s/it] 95%|█████████▌| 11603/12188 [1:22:47<1:13:59, 7.59s/it] {'loss': 0.3246, 'grad_norm': 0.7630713359627296, 'learning_rate': 6.050277010860861e-08, 'epoch': 0.95} + 95%|█████████▌| 11603/12188 [1:22:47<1:13:59, 7.59s/it] 95%|█████████▌| 11604/12188 [1:22:55<1:13:12, 7.52s/it] {'loss': 0.2489, 'grad_norm': 0.7469175423030573, 'learning_rate': 6.029686788495748e-08, 'epoch': 0.95} + 95%|█████████▌| 11604/12188 [1:22:55<1:13:12, 7.52s/it] 95%|█████████▌| 11605/12188 [1:23:05<1:20:01, 8.24s/it] {'loss': 0.3053, 'grad_norm': 0.6803354094364792, 'learning_rate': 6.009131449520534e-08, 'epoch': 0.95} + 95%|█████████▌| 11605/12188 [1:23:05<1:20:01, 8.24s/it] 95%|█████████▌| 11606/12188 [1:23:12<1:18:39, 8.11s/it] {'loss': 0.2994, 'grad_norm': 0.683176537428937, 'learning_rate': 5.988610995386779e-08, 'epoch': 0.95} + 95%|█████████▌| 11606/12188 [1:23:12<1:18:39, 8.11s/it] 95%|█████████▌| 11607/12188 [1:23:20<1:16:50, 7.94s/it] {'loss': 0.2792, 'grad_norm': 0.6690136663173144, 'learning_rate': 5.968125427543658e-08, 'epoch': 0.95} + 95%|█████████▌| 11607/12188 [1:23:20<1:16:50, 7.94s/it] 95%|█████████▌| 11608/12188 [1:23:28<1:15:58, 7.86s/it] {'loss': 0.3085, 'grad_norm': 0.7392215599824304, 'learning_rate': 5.947674747437793e-08, 'epoch': 0.95} + 95%|█████████▌| 11608/12188 [1:23:28<1:15:58, 7.86s/it] 95%|█████████▌| 11609/12188 [1:23:35<1:15:24, 7.81s/it] {'loss': 0.2701, 'grad_norm': 1.7299677084087792, 'learning_rate': 5.9272589565133595e-08, 'epoch': 0.95} + 95%|█████████▌| 11609/12188 [1:23:35<1:15:24, 7.81s/it] 95%|█████████▌| 11610/12188 [1:23:43<1:13:38, 7.64s/it] {'loss': 0.3, 'grad_norm': 0.7019272451742088, 'learning_rate': 5.906878056212151e-08, 'epoch': 0.95} + 95%|█████████▌| 11610/12188 [1:23:43<1:13:38, 7.64s/it] 95%|█████████▌| 11611/12188 [1:23:50<1:13:40, 7.66s/it] {'loss': 0.3042, 'grad_norm': 0.7861135960358739, 'learning_rate': 5.886532047973348e-08, 'epoch': 0.95} + 95%|█████████▌| 11611/12188 [1:23:50<1:13:40, 7.66s/it] 95%|█████████▌| 11612/12188 [1:23:58<1:13:23, 7.64s/it] {'loss': 0.3319, 'grad_norm': 0.702293766411594, 'learning_rate': 5.866220933233802e-08, 'epoch': 0.95} + 95%|█████████▌| 11612/12188 [1:23:58<1:13:23, 7.64s/it] 95%|█████████▌| 11613/12188 [1:24:05<1:12:06, 7.52s/it] {'loss': 0.2687, 'grad_norm': 0.688562641923607, 'learning_rate': 5.8459447134278645e-08, 'epoch': 0.95} + 95%|█████████▌| 11613/12188 [1:24:05<1:12:06, 7.52s/it] 95%|█████████▌| 11614/12188 [1:24:13<1:12:18, 7.56s/it] {'loss': 0.2735, 'grad_norm': 0.6767244198083797, 'learning_rate': 5.825703389987392e-08, 'epoch': 0.95} + 95%|█████████▌| 11614/12188 [1:24:13<1:12:18, 7.56s/it] 95%|█████████▌| 11615/12188 [1:24:21<1:14:15, 7.78s/it] {'loss': 0.3149, 'grad_norm': 0.8225906034779811, 'learning_rate': 5.80549696434185e-08, 'epoch': 0.95} + 95%|█████████▌| 11615/12188 [1:24:21<1:14:15, 7.78s/it] 95%|█████████▌| 11616/12188 [1:24:28<1:12:51, 7.64s/it] {'loss': 0.2937, 'grad_norm': 0.6629842734259151, 'learning_rate': 5.785325437918044e-08, 'epoch': 0.95} + 95%|█████████▌| 11616/12188 [1:24:28<1:12:51, 7.64s/it] 95%|█████████▌| 11617/12188 [1:24:36<1:12:53, 7.66s/it] {'loss': 0.3258, 'grad_norm': 0.7516852340698679, 'learning_rate': 5.765188812140554e-08, 'epoch': 0.95} + 95%|█████████▌| 11617/12188 [1:24:36<1:12:53, 7.66s/it] 95%|█████████▌| 11618/12188 [1:24:44<1:12:43, 7.65s/it] {'loss': 0.3003, 'grad_norm': 0.8139492164037053, 'learning_rate': 5.745087088431411e-08, 'epoch': 0.95} + 95%|█████████▌| 11618/12188 [1:24:44<1:12:43, 7.65s/it] 95%|█████████▌| 11619/12188 [1:24:51<1:12:21, 7.63s/it] {'loss': 0.2819, 'grad_norm': 1.1031558597044164, 'learning_rate': 5.7250202682100907e-08, 'epoch': 0.95} + 95%|█████████▌| 11619/12188 [1:24:51<1:12:21, 7.63s/it] 95%|█████████▌| 11620/12188 [1:24:59<1:12:29, 7.66s/it] {'loss': 0.3065, 'grad_norm': 0.7883789065832931, 'learning_rate': 5.704988352893792e-08, 'epoch': 0.95} + 95%|█████████▌| 11620/12188 [1:24:59<1:12:29, 7.66s/it] 95%|█████████▌| 11621/12188 [1:25:06<1:11:55, 7.61s/it] {'loss': 0.3044, 'grad_norm': 0.7862213952373404, 'learning_rate': 5.684991343896995e-08, 'epoch': 0.95} + 95%|█████████▌| 11621/12188 [1:25:06<1:11:55, 7.61s/it] 95%|█████████▌| 11622/12188 [1:25:16<1:17:48, 8.25s/it] {'loss': 0.264, 'grad_norm': 0.6734445861737006, 'learning_rate': 5.665029242632014e-08, 'epoch': 0.95} + 95%|█████████▌| 11622/12188 [1:25:16<1:17:48, 8.25s/it] 95%|█████████▌| 11623/12188 [1:25:24<1:15:24, 8.01s/it] {'loss': 0.2858, 'grad_norm': 0.8753518522241668, 'learning_rate': 5.645102050508389e-08, 'epoch': 0.95} + 95%|█████████▌| 11623/12188 [1:25:24<1:15:24, 8.01s/it] 95%|█████████▌| 11624/12188 [1:25:31<1:13:15, 7.79s/it] {'loss': 0.2703, 'grad_norm': 0.7141115081875615, 'learning_rate': 5.625209768933437e-08, 'epoch': 0.95} + 95%|█████████▌| 11624/12188 [1:25:31<1:13:15, 7.79s/it] 95%|█████████▌| 11625/12188 [1:25:39<1:13:13, 7.80s/it] {'loss': 0.2898, 'grad_norm': 0.7637739955258056, 'learning_rate': 5.60535239931187e-08, 'epoch': 0.95} + 95%|█████████▌| 11625/12188 [1:25:39<1:13:13, 7.80s/it] 95%|█████████▌| 11626/12188 [1:25:46<1:12:46, 7.77s/it] {'loss': 0.2963, 'grad_norm': 0.7914097604703239, 'learning_rate': 5.585529943046064e-08, 'epoch': 0.95} + 95%|█████████▌| 11626/12188 [1:25:46<1:12:46, 7.77s/it] 95%|█████████▌| 11627/12188 [1:25:54<1:11:58, 7.70s/it] {'loss': 0.2853, 'grad_norm': 0.6955174703033478, 'learning_rate': 5.5657424015357895e-08, 'epoch': 0.95} + 95%|█████████▌| 11627/12188 [1:25:54<1:11:58, 7.70s/it] 95%|█████████▌| 11628/12188 [1:26:01<1:11:19, 7.64s/it] {'loss': 0.2996, 'grad_norm': 0.6882487890453047, 'learning_rate': 5.545989776178373e-08, 'epoch': 0.95} + 95%|█████████▌| 11628/12188 [1:26:01<1:11:19, 7.64s/it] 95%|█████████▌| 11629/12188 [1:26:11<1:17:18, 8.30s/it] {'loss': 0.2914, 'grad_norm': 0.8945239570390398, 'learning_rate': 5.5262720683688096e-08, 'epoch': 0.95} + 95%|█████████▌| 11629/12188 [1:26:11<1:17:18, 8.30s/it] 95%|█████████▌| 11630/12188 [1:26:22<1:24:28, 9.08s/it] {'loss': 0.2989, 'grad_norm': 0.6845077893942489, 'learning_rate': 5.506589279499486e-08, 'epoch': 0.95} + 95%|█████████▌| 11630/12188 [1:26:22<1:24:28, 9.08s/it] 95%|█████████▌| 11631/12188 [1:26:29<1:19:14, 8.54s/it] {'loss': 0.2862, 'grad_norm': 0.7263728681554206, 'learning_rate': 5.486941410960289e-08, 'epoch': 0.95} + 95%|█████████▌| 11631/12188 [1:26:30<1:19:14, 8.54s/it] 95%|█████████▌| 11632/12188 [1:26:37<1:17:01, 8.31s/it] {'loss': 0.3251, 'grad_norm': 0.6468141374464808, 'learning_rate': 5.467328464138888e-08, 'epoch': 0.95} + 95%|█████████▌| 11632/12188 [1:26:37<1:17:01, 8.31s/it] 95%|█████████▌| 11633/12188 [1:26:47<1:21:55, 8.86s/it] {'loss': 0.3204, 'grad_norm': 0.7236304806659882, 'learning_rate': 5.4477504404201185e-08, 'epoch': 0.95} + 95%|█████████▌| 11633/12188 [1:26:47<1:21:55, 8.86s/it] 95%|█████████▌| 11634/12188 [1:26:55<1:18:02, 8.45s/it] {'loss': 0.3264, 'grad_norm': 0.7226330231593685, 'learning_rate': 5.4282073411867087e-08, 'epoch': 0.95} + 95%|█████████▌| 11634/12188 [1:26:55<1:18:02, 8.45s/it] 95%|█████████▌| 11635/12188 [1:27:03<1:16:06, 8.26s/it] {'loss': 0.3173, 'grad_norm': 0.7916578027205069, 'learning_rate': 5.4086991678187206e-08, 'epoch': 0.95} + 95%|█████████▌| 11635/12188 [1:27:03<1:16:06, 8.26s/it] 95%|█████████▌| 11636/12188 [1:27:10<1:13:24, 7.98s/it] {'loss': 0.2749, 'grad_norm': 0.7151703863281624, 'learning_rate': 5.389225921693775e-08, 'epoch': 0.95} + 95%|█████████▌| 11636/12188 [1:27:10<1:13:24, 7.98s/it] 95%|█████████▌| 11637/12188 [1:27:19<1:16:57, 8.38s/it] {'loss': 0.2862, 'grad_norm': 0.7256015837028933, 'learning_rate': 5.369787604186993e-08, 'epoch': 0.95} + 95%|█████████▌| 11637/12188 [1:27:19<1:16:57, 8.38s/it] 95%|█████████▌| 11638/12188 [1:27:27<1:13:32, 8.02s/it] {'loss': 0.2799, 'grad_norm': 0.7275220701334022, 'learning_rate': 5.350384216671167e-08, 'epoch': 0.95} + 95%|█████████▌| 11638/12188 [1:27:27<1:13:32, 8.02s/it] 95%|█████████▌| 11639/12188 [1:27:34<1:12:05, 7.88s/it] {'loss': 0.2978, 'grad_norm': 0.7378517868581264, 'learning_rate': 5.331015760516478e-08, 'epoch': 0.95} + 95%|█████████▌| 11639/12188 [1:27:34<1:12:05, 7.88s/it] 96%|█████████▌| 11640/12188 [1:27:42<1:12:49, 7.97s/it] {'loss': 0.2944, 'grad_norm': 0.7260809333066194, 'learning_rate': 5.3116822370907206e-08, 'epoch': 0.95} + 96%|█████████▌| 11640/12188 [1:27:42<1:12:49, 7.97s/it] 96%|█████████▌| 11641/12188 [1:27:50<1:13:00, 8.01s/it] {'loss': 0.2761, 'grad_norm': 0.6293781908901763, 'learning_rate': 5.2923836477591916e-08, 'epoch': 0.96} + 96%|█████████▌| 11641/12188 [1:27:50<1:13:00, 8.01s/it] 96%|█████████▌| 11642/12188 [1:27:58<1:12:45, 8.00s/it] {'loss': 0.2983, 'grad_norm': 0.7715753417416791, 'learning_rate': 5.273119993884745e-08, 'epoch': 0.96} + 96%|█████████▌| 11642/12188 [1:27:58<1:12:45, 8.00s/it] 96%|█████████▌| 11643/12188 [1:28:06<1:11:27, 7.87s/it] {'loss': 0.2858, 'grad_norm': 0.7726584181603239, 'learning_rate': 5.2538912768276826e-08, 'epoch': 0.96} + 96%|█████████▌| 11643/12188 [1:28:06<1:11:27, 7.87s/it] 96%|█████████▌| 11644/12188 [1:28:13<1:09:39, 7.68s/it] {'loss': 0.2791, 'grad_norm': 0.7737672234062729, 'learning_rate': 5.234697497945973e-08, 'epoch': 0.96} + 96%|█████████▌| 11644/12188 [1:28:13<1:09:39, 7.68s/it] 96%|█████████▌| 11645/12188 [1:28:21<1:10:28, 7.79s/it] {'loss': 0.2955, 'grad_norm': 0.6925338263432202, 'learning_rate': 5.215538658595032e-08, 'epoch': 0.96} + 96%|█████████▌| 11645/12188 [1:28:21<1:10:28, 7.79s/it] 96%|█████████▌| 11646/12188 [1:28:29<1:09:51, 7.73s/it] {'loss': 0.3223, 'grad_norm': 0.7254993147516373, 'learning_rate': 5.196414760127777e-08, 'epoch': 0.96} + 96%|█████████▌| 11646/12188 [1:28:29<1:09:51, 7.73s/it] 96%|█████████▌| 11647/12188 [1:28:36<1:08:31, 7.60s/it] {'loss': 0.3158, 'grad_norm': 0.6967789586918134, 'learning_rate': 5.1773258038947395e-08, 'epoch': 0.96} + 96%|█████████▌| 11647/12188 [1:28:36<1:08:31, 7.60s/it] 96%|█████████▌| 11648/12188 [1:28:44<1:08:03, 7.56s/it] {'loss': 0.286, 'grad_norm': 0.7081755538608755, 'learning_rate': 5.1582717912440076e-08, 'epoch': 0.96} + 96%|█████████▌| 11648/12188 [1:28:44<1:08:03, 7.56s/it] 96%|█████████▌| 11649/12188 [1:28:51<1:07:51, 7.55s/it] {'loss': 0.2612, 'grad_norm': 0.6586506599722132, 'learning_rate': 5.13925272352106e-08, 'epoch': 0.96} + 96%|█████████▌| 11649/12188 [1:28:51<1:07:51, 7.55s/it] 96%|█████████▌| 11650/12188 [1:28:58<1:07:07, 7.49s/it] {'loss': 0.3236, 'grad_norm': 0.7118524064473264, 'learning_rate': 5.120268602069101e-08, 'epoch': 0.96} + 96%|█████████▌| 11650/12188 [1:28:58<1:07:07, 7.49s/it] 96%|█████████▌| 11651/12188 [1:29:06<1:06:52, 7.47s/it] {'loss': 0.3203, 'grad_norm': 0.7846537587869404, 'learning_rate': 5.101319428228613e-08, 'epoch': 0.96} + 96%|█████████▌| 11651/12188 [1:29:06<1:06:52, 7.47s/it] 96%|█████████▌| 11652/12188 [1:29:15<1:10:02, 7.84s/it] {'loss': 0.3012, 'grad_norm': 0.7188852118761534, 'learning_rate': 5.082405203337859e-08, 'epoch': 0.96} + 96%|█████████▌| 11652/12188 [1:29:15<1:10:02, 7.84s/it] 96%|█████████▌| 11653/12188 [1:29:22<1:09:58, 7.85s/it] {'loss': 0.3367, 'grad_norm': 0.7022949622889041, 'learning_rate': 5.0635259287324934e-08, 'epoch': 0.96} + 96%|█████████▌| 11653/12188 [1:29:22<1:09:58, 7.85s/it] 96%|█████████▌| 11654/12188 [1:29:33<1:16:51, 8.64s/it] {'loss': 0.2658, 'grad_norm': 0.6692906195496103, 'learning_rate': 5.044681605745727e-08, 'epoch': 0.96} + 96%|█████████▌| 11654/12188 [1:29:33<1:16:51, 8.64s/it] 96%|█████████▌| 11655/12188 [1:29:40<1:13:26, 8.27s/it] {'loss': 0.3102, 'grad_norm': 0.6930987571676941, 'learning_rate': 5.025872235708384e-08, 'epoch': 0.96} + 96%|█████████▌| 11655/12188 [1:29:40<1:13:26, 8.27s/it] 96%|█████████▌| 11656/12188 [1:29:48<1:11:22, 8.05s/it] {'loss': 0.3282, 'grad_norm': 0.7179901071047708, 'learning_rate': 5.0070978199486806e-08, 'epoch': 0.96} + 96%|█████████▌| 11656/12188 [1:29:48<1:11:22, 8.05s/it] 96%|█████████▌| 11657/12188 [1:29:58<1:16:03, 8.59s/it] {'loss': 0.3543, 'grad_norm': 0.7337719477967835, 'learning_rate': 4.9883583597925e-08, 'epoch': 0.96} + 96%|█████████▌| 11657/12188 [1:29:58<1:16:03, 8.59s/it] 96%|█████████▌| 11658/12188 [1:30:05<1:12:53, 8.25s/it] {'loss': 0.2752, 'grad_norm': 0.6785531997986926, 'learning_rate': 4.9696538565631724e-08, 'epoch': 0.96} + 96%|█████████▌| 11658/12188 [1:30:05<1:12:53, 8.25s/it] 96%|█████████▌| 11659/12188 [1:30:13<1:10:26, 7.99s/it] {'loss': 0.2954, 'grad_norm': 0.7133175359435705, 'learning_rate': 4.9509843115814746e-08, 'epoch': 0.96} + 96%|█████████▌| 11659/12188 [1:30:13<1:10:26, 7.99s/it] 96%|█████████▌| 11660/12188 [1:30:21<1:10:19, 7.99s/it] {'loss': 0.2908, 'grad_norm': 0.6255715757558338, 'learning_rate': 4.9323497261659635e-08, 'epoch': 0.96} + 96%|█████████▌| 11660/12188 [1:30:21<1:10:19, 7.99s/it] 96%|█████████▌| 11661/12188 [1:30:28<1:08:46, 7.83s/it] {'loss': 0.2956, 'grad_norm': 0.7061489897289401, 'learning_rate': 4.9137501016325305e-08, 'epoch': 0.96} + 96%|█████████▌| 11661/12188 [1:30:28<1:08:46, 7.83s/it] 96%|█████████▌| 11662/12188 [1:30:35<1:07:36, 7.71s/it] {'loss': 0.2978, 'grad_norm': 0.7133243185531801, 'learning_rate': 4.8951854392946254e-08, 'epoch': 0.96} + 96%|█████████▌| 11662/12188 [1:30:35<1:07:36, 7.71s/it] 96%|█████████▌| 11663/12188 [1:30:43<1:06:21, 7.58s/it] {'loss': 0.3192, 'grad_norm': 1.372606863428361, 'learning_rate': 4.8766557404633095e-08, 'epoch': 0.96} + 96%|█████████▌| 11663/12188 [1:30:43<1:06:21, 7.58s/it] 96%|█████████▌| 11664/12188 [1:30:50<1:06:05, 7.57s/it] {'loss': 0.2794, 'grad_norm': 0.6569735329067793, 'learning_rate': 4.858161006447038e-08, 'epoch': 0.96} + 96%|█████████▌| 11664/12188 [1:30:50<1:06:05, 7.57s/it] 96%|█████████▌| 11665/12188 [1:30:58<1:05:18, 7.49s/it] {'loss': 0.2788, 'grad_norm': 0.7142479706436962, 'learning_rate': 4.8397012385519856e-08, 'epoch': 0.96} + 96%|█████████▌| 11665/12188 [1:30:58<1:05:18, 7.49s/it] 96%|█████████▌| 11666/12188 [1:31:05<1:05:58, 7.58s/it] {'loss': 0.286, 'grad_norm': 0.7414638337743316, 'learning_rate': 4.8212764380816676e-08, 'epoch': 0.96} + 96%|█████████▌| 11666/12188 [1:31:05<1:05:58, 7.58s/it] 96%|█████████▌| 11667/12188 [1:31:13<1:06:08, 7.62s/it] {'loss': 0.2693, 'grad_norm': 0.6712293816685777, 'learning_rate': 4.802886606337209e-08, 'epoch': 0.96} + 96%|█████████▌| 11667/12188 [1:31:13<1:06:08, 7.62s/it] 96%|█████████▌| 11668/12188 [1:31:21<1:05:50, 7.60s/it] {'loss': 0.3289, 'grad_norm': 0.730692845492134, 'learning_rate': 4.7845317446174044e-08, 'epoch': 0.96} + 96%|█████████▌| 11668/12188 [1:31:21<1:05:50, 7.60s/it] 96%|█████████▌| 11669/12188 [1:31:28<1:05:39, 7.59s/it] {'loss': 0.2954, 'grad_norm': 0.6382819405406593, 'learning_rate': 4.766211854218217e-08, 'epoch': 0.96} + 96%|█████████▌| 11669/12188 [1:31:28<1:05:39, 7.59s/it] 96%|█████████▌| 11670/12188 [1:31:36<1:06:14, 7.67s/it] {'loss': 0.2632, 'grad_norm': 0.6661417151995015, 'learning_rate': 4.747926936433611e-08, 'epoch': 0.96} + 96%|█████████▌| 11670/12188 [1:31:36<1:06:14, 7.67s/it] 96%|█████████▌| 11671/12188 [1:31:43<1:05:14, 7.57s/it] {'loss': 0.2823, 'grad_norm': 0.7053995824009092, 'learning_rate': 4.729676992554666e-08, 'epoch': 0.96} + 96%|█████████▌| 11671/12188 [1:31:43<1:05:14, 7.57s/it] 96%|█████████▌| 11672/12188 [1:31:51<1:05:40, 7.64s/it] {'loss': 0.3135, 'grad_norm': 0.6989509434702533, 'learning_rate': 4.711462023870239e-08, 'epoch': 0.96} + 96%|█████████▌| 11672/12188 [1:31:51<1:05:40, 7.64s/it] 96%|█████████▌| 11673/12188 [1:31:59<1:05:20, 7.61s/it] {'loss': 0.2947, 'grad_norm': 0.6798632915550349, 'learning_rate': 4.693282031666579e-08, 'epoch': 0.96} + 96%|█████████▌| 11673/12188 [1:31:59<1:05:20, 7.61s/it] 96%|█████████▌| 11674/12188 [1:32:06<1:04:21, 7.51s/it] {'loss': 0.2924, 'grad_norm': 0.7092252390311168, 'learning_rate': 4.6751370172276044e-08, 'epoch': 0.96} + 96%|█████████▌| 11674/12188 [1:32:06<1:04:21, 7.51s/it] 96%|█████████▌| 11675/12188 [1:32:14<1:04:38, 7.56s/it] {'loss': 0.2891, 'grad_norm': 0.714456566258489, 'learning_rate': 4.657026981834623e-08, 'epoch': 0.96} + 96%|█████████▌| 11675/12188 [1:32:14<1:04:38, 7.56s/it] 96%|█████████▌| 11676/12188 [1:32:21<1:04:46, 7.59s/it] {'loss': 0.2984, 'grad_norm': 0.6729717548896826, 'learning_rate': 4.6389519267666107e-08, 'epoch': 0.96} + 96%|█████████▌| 11676/12188 [1:32:21<1:04:46, 7.59s/it] 96%|█████████▌| 11677/12188 [1:32:29<1:04:47, 7.61s/it] {'loss': 0.3122, 'grad_norm': 0.7285435134129544, 'learning_rate': 4.6209118532999365e-08, 'epoch': 0.96} + 96%|█████████▌| 11677/12188 [1:32:29<1:04:47, 7.61s/it] 96%|█████████▌| 11678/12188 [1:32:37<1:05:32, 7.71s/it] {'loss': 0.2646, 'grad_norm': 0.6540854137882476, 'learning_rate': 4.602906762708526e-08, 'epoch': 0.96} + 96%|█████████▌| 11678/12188 [1:32:37<1:05:32, 7.71s/it] 96%|█████████▌| 11679/12188 [1:32:45<1:05:32, 7.73s/it] {'loss': 0.3085, 'grad_norm': 0.8733006772219206, 'learning_rate': 4.584936656264027e-08, 'epoch': 0.96} + 96%|█████████▌| 11679/12188 [1:32:45<1:05:32, 7.73s/it] 96%|█████████▌| 11680/12188 [1:32:52<1:04:21, 7.60s/it] {'loss': 0.2843, 'grad_norm': 0.6558224495342561, 'learning_rate': 4.56700153523526e-08, 'epoch': 0.96} + 96%|█████████▌| 11680/12188 [1:32:52<1:04:21, 7.60s/it] 96%|█████████▌| 11681/12188 [1:32:59<1:03:28, 7.51s/it] {'loss': 0.2926, 'grad_norm': 0.7102350170094677, 'learning_rate': 4.549101400888933e-08, 'epoch': 0.96} + 96%|█████████▌| 11681/12188 [1:32:59<1:03:28, 7.51s/it] 96%|█████████▌| 11682/12188 [1:33:07<1:02:50, 7.45s/it] {'loss': 0.3222, 'grad_norm': 0.6936138810247945, 'learning_rate': 4.531236254489035e-08, 'epoch': 0.96} + 96%|█████████▌| 11682/12188 [1:33:07<1:02:50, 7.45s/it] 96%|█████████▌| 11683/12188 [1:33:16<1:06:56, 7.95s/it] {'loss': 0.2823, 'grad_norm': 0.69741783499237, 'learning_rate': 4.513406097297224e-08, 'epoch': 0.96} + 96%|█████████▌| 11683/12188 [1:33:16<1:06:56, 7.95s/it] 96%|█████████▌| 11684/12188 [1:33:24<1:06:58, 7.97s/it] {'loss': 0.3291, 'grad_norm': 0.6971399339603158, 'learning_rate': 4.495610930572603e-08, 'epoch': 0.96} + 96%|█████████▌| 11684/12188 [1:33:24<1:06:58, 7.97s/it] 96%|█████████▌| 11685/12188 [1:33:32<1:06:31, 7.94s/it] {'loss': 0.2693, 'grad_norm': 0.7021632965185957, 'learning_rate': 4.4778507555718886e-08, 'epoch': 0.96} + 96%|█████████▌| 11685/12188 [1:33:32<1:06:31, 7.94s/it] 96%|█████████▌| 11686/12188 [1:33:39<1:05:43, 7.86s/it] {'loss': 0.3289, 'grad_norm': 0.697082191378743, 'learning_rate': 4.46012557354919e-08, 'epoch': 0.96} + 96%|█████████▌| 11686/12188 [1:33:39<1:05:43, 7.86s/it] 96%|█████████▌| 11687/12188 [1:33:47<1:04:36, 7.74s/it] {'loss': 0.2774, 'grad_norm': 0.7462864331191027, 'learning_rate': 4.442435385756283e-08, 'epoch': 0.96} + 96%|█████████▌| 11687/12188 [1:33:47<1:04:36, 7.74s/it] 96%|█████████▌| 11688/12188 [1:33:55<1:05:25, 7.85s/it] {'loss': 0.2806, 'grad_norm': 0.666601498078659, 'learning_rate': 4.4247801934423904e-08, 'epoch': 0.96} + 96%|█████████▌| 11688/12188 [1:33:55<1:05:25, 7.85s/it] 96%|█████████▌| 11689/12188 [1:34:02<1:04:02, 7.70s/it] {'loss': 0.3414, 'grad_norm': 0.7529304183853978, 'learning_rate': 4.407159997854349e-08, 'epoch': 0.96} + 96%|█████████▌| 11689/12188 [1:34:02<1:04:02, 7.70s/it] 96%|█████████▌| 11690/12188 [1:34:09<1:02:42, 7.55s/it] {'loss': 0.2951, 'grad_norm': 0.6770672095762997, 'learning_rate': 4.3895748002364404e-08, 'epoch': 0.96} + 96%|█████████▌| 11690/12188 [1:34:09<1:02:42, 7.55s/it] 96%|█████████▌| 11691/12188 [1:34:17<1:02:40, 7.57s/it] {'loss': 0.3152, 'grad_norm': 0.7452582678387977, 'learning_rate': 4.372024601830449e-08, 'epoch': 0.96} + 96%|█████████▌| 11691/12188 [1:34:17<1:02:40, 7.57s/it] 96%|█████████▌| 11692/12188 [1:34:24<1:01:53, 7.49s/it] {'loss': 0.2903, 'grad_norm': 0.7028363539932324, 'learning_rate': 4.354509403875884e-08, 'epoch': 0.96} + 96%|█████████▌| 11692/12188 [1:34:24<1:01:53, 7.49s/it] 96%|█████████▌| 11693/12188 [1:34:32<1:02:43, 7.60s/it] {'loss': 0.2656, 'grad_norm': 0.6892106612488396, 'learning_rate': 4.337029207609478e-08, 'epoch': 0.96} + 96%|█████████▌| 11693/12188 [1:34:32<1:02:43, 7.60s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 899, in process_image_unified + visual_processed = processor.preprocess(image, return_tensors="pt") + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 432, in preprocess + patches, image_grid_thw = self._preprocess( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 252, in _preprocess + resized_height, resized_width = smart_resize( + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 69, in smart_resize + raise ValueError(f"height:{height} and width:{width} must be larger than factor:{factor}") +ValueError: height:21 and width:135 must be larger than factor:28 +[Try #0] Failed to fetch sample 2092006 in VC:s3://gui-agent/jedi/images/figma400k/figma400k_extracted/. Exception: height:21 and width:135 must be larger than factor:28 +Problematic sample: {'image': 'b740dccee641dd995e5ce727ca3882efdf31feffa6d5688fe120c85e9c186e93.png', 'conversations': [{'from': 'human', 'value': "\nThe visual attributes of this Button are:\nThe element is a circular button with a gray background and a black plus sign in the center. It has a minimalist design with no additional text or icons. The button is visually distinct due to its simple geometric shape and contrasting colors.\n\nThe spatial layout of this Button:\nThe button is located in the middle section of the interface, to the right of a green circular play button. It is part of a horizontal control panel that includes other interactive elements. The button is positioned between the play button and a three-dot menu icon.\n\nFunctional description of the Button:\nThe primary function of this button is likely to add or save the current item, such as a song or playlist, to a user's library or a specific list. Users can interact with it by clicking or tapping, which would typically result in the item being added to their collection."}, {'from': 'gpt', 'value': '[[0, 0, 1000, 1000]]', 'recipient': 'all', 'end_turn': True}]} + 96%|█████████▌| 11694/12188 [1:34:40<1:04:05, 7.79s/it] {'loss': 0.2977, 'grad_norm': 0.7650994410200068, 'learning_rate': 4.319584014265743e-08, 'epoch': 0.96} + 96%|█████████▌| 11694/12188 [1:34:40<1:04:05, 7.79s/it] 96%|█████████▌| 11695/12188 [1:34:51<1:10:24, 8.57s/it] {'loss': 0.2971, 'grad_norm': 0.6584422767267282, 'learning_rate': 4.302173825076639e-08, 'epoch': 0.96} + 96%|█████████▌| 11695/12188 [1:34:51<1:10:24, 8.57s/it] 96%|█████████▌| 11696/12188 [1:35:00<1:12:46, 8.87s/it] {'loss': 0.3184, 'grad_norm': 0.7125131309122773, 'learning_rate': 4.28479864127157e-08, 'epoch': 0.96} + 96%|█████████▌| 11696/12188 [1:35:00<1:12:46, 8.87s/it] 96%|█████████▌| 11697/12188 [1:35:08<1:09:35, 8.50s/it] {'loss': 0.3027, 'grad_norm': 0.7090560816354273, 'learning_rate': 4.2674584640776676e-08, 'epoch': 0.96} + 96%|█████████▌| 11697/12188 [1:35:08<1:09:35, 8.50s/it] 96%|█████████▌| 11698/12188 [1:35:16<1:07:00, 8.21s/it] {'loss': 0.2867, 'grad_norm': 0.6974034583373845, 'learning_rate': 4.250153294719339e-08, 'epoch': 0.96} + 96%|█████████▌| 11698/12188 [1:35:16<1:07:00, 8.21s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f8233ad6d90> +[Try #0] Failed to fetch sample 4475518 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f8233ad6d90> +Problematic sample: {'image': '20240823_072753_before_screenshot.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Recent changes'"}, {'from': 'gpt', 'value': '\nclick(x=0.865, y=0.253)\n'}]} + 96%|█████████▌| 11699/12188 [1:35:26<1:11:37, 8.79s/it] {'loss': 0.2717, 'grad_norm': 0.6316702881271453, 'learning_rate': 4.2328831344187196e-08, 'epoch': 0.96} + 96%|█████████▌| 11699/12188 [1:35:26<1:11:37, 8.79s/it] 96%|█████████▌| 11700/12188 [1:35:34<1:10:17, 8.64s/it] {'loss': 0.3003, 'grad_norm': 0.6908986167203036, 'learning_rate': 4.215647984395388e-08, 'epoch': 0.96} + 96%|█████████▌| 11700/12188 [1:35:34<1:10:17, 8.64s/it] 96%|█████████▌| 11701/12188 [1:35:42<1:07:26, 8.31s/it] {'loss': 0.2606, 'grad_norm': 0.6703579958197343, 'learning_rate': 4.1984478458664265e-08, 'epoch': 0.96} + 96%|█████████▌| 11701/12188 [1:35:42<1:07:26, 8.31s/it] 96%|█████████▌| 11702/12188 [1:35:49<1:05:01, 8.03s/it] {'loss': 0.2838, 'grad_norm': 0.7046171578916499, 'learning_rate': 4.181282720046531e-08, 'epoch': 0.96} + 96%|█████████▌| 11702/12188 [1:35:49<1:05:01, 8.03s/it] 96%|█████████▌| 11703/12188 [1:35:56<1:03:16, 7.83s/it] {'loss': 0.3228, 'grad_norm': 0.8037567147224653, 'learning_rate': 4.164152608147842e-08, 'epoch': 0.96} + 96%|█████████▌| 11703/12188 [1:35:56<1:03:16, 7.83s/it] 96%|█████████▌| 11704/12188 [1:36:04<1:03:25, 7.86s/it] {'loss': 0.2708, 'grad_norm': 0.8229693507182569, 'learning_rate': 4.147057511380115e-08, 'epoch': 0.96} + 96%|█████████▌| 11704/12188 [1:36:04<1:03:25, 7.86s/it] 96%|█████████▌| 11705/12188 [1:36:12<1:02:25, 7.76s/it] {'loss': 0.3492, 'grad_norm': 0.7746577918682171, 'learning_rate': 4.129997430950494e-08, 'epoch': 0.96} + 96%|█████████▌| 11705/12188 [1:36:12<1:02:25, 7.76s/it] 96%|█████████▌| 11706/12188 [1:36:21<1:07:00, 8.34s/it] {'loss': 0.3111, 'grad_norm': 0.6767792692808359, 'learning_rate': 4.112972368063794e-08, 'epoch': 0.96} + 96%|█████████▌| 11706/12188 [1:36:21<1:07:00, 8.34s/it] 96%|█████████▌| 11707/12188 [1:36:29<1:05:08, 8.13s/it] {'loss': 0.2873, 'grad_norm': 0.7372446315421658, 'learning_rate': 4.095982323922332e-08, 'epoch': 0.96} + 96%|█████████▌| 11707/12188 [1:36:29<1:05:08, 8.13s/it] 96%|█████████▌| 11708/12188 [1:36:37<1:03:38, 7.96s/it] {'loss': 0.3324, 'grad_norm': 0.7454254549261135, 'learning_rate': 4.079027299725757e-08, 'epoch': 0.96} + 96%|█████████▌| 11708/12188 [1:36:37<1:03:38, 7.96s/it] 96%|█████████▌| 11709/12188 [1:36:44<1:02:03, 7.77s/it] {'loss': 0.2367, 'grad_norm': 0.6539531795984255, 'learning_rate': 4.0621072966716135e-08, 'epoch': 0.96} + 96%|█████████▌| 11709/12188 [1:36:44<1:02:03, 7.77s/it] 96%|█████████▌| 11710/12188 [1:36:52<1:02:34, 7.85s/it] {'loss': 0.2888, 'grad_norm': 0.687637724582446, 'learning_rate': 4.045222315954611e-08, 'epoch': 0.96} + 96%|█████████▌| 11710/12188 [1:36:52<1:02:34, 7.85s/it] 96%|█████████▌| 11711/12188 [1:37:01<1:04:58, 8.17s/it] {'loss': 0.2818, 'grad_norm': 0.7488082251629669, 'learning_rate': 4.0283723587672405e-08, 'epoch': 0.96} + 96%|█████████▌| 11711/12188 [1:37:01<1:04:58, 8.17s/it] 96%|█████████▌| 11712/12188 [1:37:08<1:03:23, 7.99s/it] {'loss': 0.2917, 'grad_norm': 0.6651036890354699, 'learning_rate': 4.0115574262993284e-08, 'epoch': 0.96} + 96%|█████████▌| 11712/12188 [1:37:08<1:03:23, 7.99s/it] 96%|█████████▌| 11713/12188 [1:37:16<1:01:38, 7.79s/it] {'loss': 0.2852, 'grad_norm': 0.732288267477677, 'learning_rate': 3.994777519738424e-08, 'epoch': 0.96} + 96%|██��██████▌| 11713/12188 [1:37:16<1:01:38, 7.79s/it] 96%|█████████▌| 11714/12188 [1:37:26<1:06:20, 8.40s/it] {'loss': 0.3334, 'grad_norm': 1.6232059362851552, 'learning_rate': 3.9780326402694135e-08, 'epoch': 0.96} + 96%|█████████▌| 11714/12188 [1:37:26<1:06:20, 8.40s/it] 96%|█████████▌| 11715/12188 [1:37:33<1:04:28, 8.18s/it] {'loss': 0.326, 'grad_norm': 0.7130187670321327, 'learning_rate': 3.961322789074795e-08, 'epoch': 0.96} + 96%|█████████▌| 11715/12188 [1:37:33<1:04:28, 8.18s/it] 96%|█████████▌| 11716/12188 [1:37:41<1:03:26, 8.06s/it] {'loss': 0.27, 'grad_norm': 0.75702686297671, 'learning_rate': 3.944647967334625e-08, 'epoch': 0.96} + 96%|█████████▌| 11716/12188 [1:37:41<1:03:26, 8.06s/it] 96%|█████████▌| 11717/12188 [1:37:49<1:03:02, 8.03s/it] {'loss': 0.3516, 'grad_norm': 0.7002604275805014, 'learning_rate': 3.928008176226461e-08, 'epoch': 0.96} + 96%|█████████▌| 11717/12188 [1:37:49<1:03:02, 8.03s/it] 96%|█████████▌| 11718/12188 [1:37:56<1:01:30, 7.85s/it] {'loss': 0.3241, 'grad_norm': 0.7685242146912565, 'learning_rate': 3.911403416925308e-08, 'epoch': 0.96} + 96%|█████████▌| 11718/12188 [1:37:56<1:01:30, 7.85s/it] 96%|█████████▌| 11719/12188 [1:38:07<1:07:36, 8.65s/it] {'loss': 0.3502, 'grad_norm': 0.8400916865389251, 'learning_rate': 3.894833690603839e-08, 'epoch': 0.96} + 96%|█████████▌| 11719/12188 [1:38:07<1:07:36, 8.65s/it] 96%|█████████▌| 11720/12188 [1:38:15<1:05:09, 8.35s/it] {'loss': 0.263, 'grad_norm': 0.7574854575223104, 'learning_rate': 3.878298998432228e-08, 'epoch': 0.96} + 96%|█████████▌| 11720/12188 [1:38:15<1:05:09, 8.35s/it] 96%|█████████▌| 11721/12188 [1:38:22<1:02:57, 8.09s/it] {'loss': 0.2635, 'grad_norm': 0.7051589145922943, 'learning_rate': 3.8617993415779876e-08, 'epoch': 0.96} + 96%|█████████▌| 11721/12188 [1:38:22<1:02:57, 8.09s/it] 96%|█████████▌| 11722/12188 [1:38:30<1:01:44, 7.95s/it] {'loss': 0.3232, 'grad_norm': 0.7526586435886291, 'learning_rate': 3.8453347212064064e-08, 'epoch': 0.96} + 96%|█████████▌| 11722/12188 [1:38:30<1:01:44, 7.95s/it] 96%|█████████▌| 11723/12188 [1:38:38<1:01:30, 7.94s/it] {'loss': 0.2632, 'grad_norm': 0.664154980328035, 'learning_rate': 3.82890513848011e-08, 'epoch': 0.96} + 96%|█████████▌| 11723/12188 [1:38:38<1:01:30, 7.94s/it] 96%|█████████▌| 11724/12188 [1:38:46<1:02:04, 8.03s/it] {'loss': 0.2822, 'grad_norm': 0.6282909310540896, 'learning_rate': 3.8125105945593935e-08, 'epoch': 0.96} + 96%|█████████▌| 11724/12188 [1:38:46<1:02:04, 8.03s/it] 96%|█████████▌| 11725/12188 [1:38:53<1:01:00, 7.91s/it] {'loss': 0.2882, 'grad_norm': 0.7261969774121017, 'learning_rate': 3.7961510906020516e-08, 'epoch': 0.96} + 96%|█████████▌| 11725/12188 [1:38:54<1:01:00, 7.91s/it] 96%|█████████▌| 11726/12188 [1:39:02<1:01:42, 8.01s/it] {'loss': 0.3189, 'grad_norm': 0.8195329686789037, 'learning_rate': 3.779826627763272e-08, 'epoch': 0.96} + 96%|█████████▌| 11726/12188 [1:39:02<1:01:42, 8.01s/it] 96%|█████████▌| 11727/12188 [1:39:10<1:02:44, 8.17s/it] {'loss': 0.2856, 'grad_norm': 0.862099040234421, 'learning_rate': 3.763537207195855e-08, 'epoch': 0.96} + 96%|█████████▌| 11727/12188 [1:39:10<1:02:44, 8.17s/it] 96%|█████████▌| 11728/12188 [1:39:18<1:00:43, 7.92s/it] {'loss': 0.2851, 'grad_norm': 0.7463895037789964, 'learning_rate': 3.747282830050214e-08, 'epoch': 0.96} + 96%|█████████▌| 11728/12188 [1:39:18<1:00:43, 7.92s/it] 96%|█████████▌| 11729/12188 [1:39:25<1:00:06, 7.86s/it] {'loss': 0.2818, 'grad_norm': 0.8460156573202277, 'learning_rate': 3.731063497474152e-08, 'epoch': 0.96} + 96%|█████████▌| 11729/12188 [1:39:25<1:00:06, 7.86s/it] 96%|█████████▌| 11730/12188 [1:39:33<58:55, 7.72s/it] {'loss': 0.3083, 'grad_norm': 0.7264107458137816, 'learning_rate': 3.714879210613087e-08, 'epoch': 0.96} + 96%|█████████▌| 11730/12188 [1:39:33<58:55, 7.72s/it] 96%|█████████▋| 11731/12188 [1:39:40<57:43, 7.58s/it] {'loss': 0.2858, 'grad_norm': 0.635855808899441, 'learning_rate': 3.698729970609882e-08, 'epoch': 0.96} + 96%|█████████▋| 11731/12188 [1:39:40<57:43, 7.58s/it] 96%|█████████▋| 11732/12188 [1:39:47<57:01, 7.50s/it] {'loss': 0.2846, 'grad_norm': 0.675665825486257, 'learning_rate': 3.6826157786050144e-08, 'epoch': 0.96} + 96%|█████████▋| 11732/12188 [1:39:47<57:01, 7.50s/it] 96%|█████████▋| 11733/12188 [1:39:55<57:10, 7.54s/it] {'loss': 0.2941, 'grad_norm': 0.6619130338425246, 'learning_rate': 3.666536635736406e-08, 'epoch': 0.96} + 96%|█████████▋| 11733/12188 [1:39:55<57:10, 7.54s/it] 96%|█████████▋| 11734/12188 [1:40:02<56:41, 7.49s/it] {'loss': 0.2843, 'grad_norm': 0.7752647428431868, 'learning_rate': 3.6504925431395946e-08, 'epoch': 0.96} + 96%|█████████▋| 11734/12188 [1:40:02<56:41, 7.49s/it] 96%|█████████▋| 11735/12188 [1:40:10<56:51, 7.53s/it] {'loss': 0.3075, 'grad_norm': 0.6998958968531642, 'learning_rate': 3.634483501947561e-08, 'epoch': 0.96} + 96%|█████████▋| 11735/12188 [1:40:10<56:51, 7.53s/it] 96%|█████████▋| 11736/12188 [1:40:17<56:21, 7.48s/it] {'loss': 0.2953, 'grad_norm': 0.6616961092635475, 'learning_rate': 3.618509513290791e-08, 'epoch': 0.96} + 96%|█████████▋| 11736/12188 [1:40:17<56:21, 7.48s/it] 96%|█████████▋| 11737/12188 [1:40:25<57:17, 7.62s/it] {'loss': 0.2862, 'grad_norm': 0.6868409984402457, 'learning_rate': 3.602570578297382e-08, 'epoch': 0.96} + 96%|█████████▋| 11737/12188 [1:40:25<57:17, 7.62s/it] 96%|█████████▋| 11738/12188 [1:40:33<56:40, 7.56s/it] {'loss': 0.2826, 'grad_norm': 0.7933610086532551, 'learning_rate': 3.5866666980929334e-08, 'epoch': 0.96} + 96%|█████████▋| 11738/12188 [1:40:33<56:40, 7.56s/it] 96%|█████████▋| 11739/12188 [1:40:41<58:35, 7.83s/it] {'loss': 0.2536, 'grad_norm': 0.6622524124392871, 'learning_rate': 3.5707978738005464e-08, 'epoch': 0.96} + 96%|█████████▋| 11739/12188 [1:40:41<58:35, 7.83s/it] 96%|█████████▋| 11740/12188 [1:40:51<1:02:11, 8.33s/it] {'loss': 0.2984, 'grad_norm': 0.774683476027425, 'learning_rate': 3.554964106540826e-08, 'epoch': 0.96} + 96%|█████████▋| 11740/12188 [1:40:51<1:02:11, 8.33s/it] 96%|█████████▋| 11741/12188 [1:40:59<1:02:14, 8.36s/it] {'loss': 0.3343, 'grad_norm': 0.6862867604699145, 'learning_rate': 3.539165397431932e-08, 'epoch': 0.96} + 96%|█████████▋| 11741/12188 [1:40:59<1:02:14, 8.36s/it] 96%|█████████▋| 11742/12188 [1:41:07<1:00:07, 8.09s/it] {'loss': 0.2592, 'grad_norm': 0.6733606598884274, 'learning_rate': 3.5234017475895276e-08, 'epoch': 0.96} + 96%|█████████▋| 11742/12188 [1:41:07<1:00:07, 8.09s/it] 96%|█████████▋| 11743/12188 [1:41:14<58:25, 7.88s/it] {'loss': 0.2944, 'grad_norm': 0.7301835799093065, 'learning_rate': 3.5076731581268896e-08, 'epoch': 0.96} + 96%|█████████▋| 11743/12188 [1:41:14<58:25, 7.88s/it] 96%|█████████▋| 11744/12188 [1:41:21<57:11, 7.73s/it] {'loss': 0.2583, 'grad_norm': 0.6643222751507747, 'learning_rate': 3.491979630154685e-08, 'epoch': 0.96} + 96%|█████████▋| 11744/12188 [1:41:21<57:11, 7.73s/it] 96%|█████████▋| 11745/12188 [1:41:30<59:49, 8.10s/it] {'loss': 0.3023, 'grad_norm': 0.7248130730780148, 'learning_rate': 3.4763211647811377e-08, 'epoch': 0.96} + 96%|█████████▋| 11745/12188 [1:41:30<59:49, 8.10s/it] 96%|█████████▋| 11746/12188 [1:41:38<59:40, 8.10s/it] {'loss': 0.3034, 'grad_norm': 0.685152509358323, 'learning_rate': 3.460697763112142e-08, 'epoch': 0.96} + 96%|█████████▋| 11746/12188 [1:41:38<59:40, 8.10s/it] 96%|█████████▋| 11747/12188 [1:41:46<58:41, 7.99s/it] {'loss': 0.2614, 'grad_norm': 0.7160678202968648, 'learning_rate': 3.44510942625087e-08, 'epoch': 0.96} + 96%|█████████▋| 11747/12188 [1:41:46<58:41, 7.99s/it] 96%|█████████▋| 11748/12188 [1:41:54<57:24, 7.83s/it] {'loss': 0.334, 'grad_norm': 0.6697934647932404, 'learning_rate': 3.429556155298219e-08, 'epoch': 0.96} + 96%|█████████▋| 11748/12188 [1:41:54<57:24, 7.83s/it] 96%|█████████▋| 11749/12188 [1:42:01<56:22, 7.71s/it] {'loss': 0.3067, 'grad_norm': 0.7463842793250114, 'learning_rate': 3.414037951352478e-08, 'epoch': 0.96} + 96%|█████████▋| 11749/12188 [1:42:01<56:22, 7.71s/it] 96%|█████████▋| 11750/12188 [1:42:08<55:40, 7.63s/it] {'loss': 0.3208, 'grad_norm': 0.7444659406878115, 'learning_rate': 3.398554815509547e-08, 'epoch': 0.96} + 96%|█████████▋| 11750/12188 [1:42:08<55:40, 7.63s/it] 96%|█████████▋| 11751/12188 [1:42:17<57:12, 7.86s/it] {'loss': 0.2908, 'grad_norm': 0.6459098384704235, 'learning_rate': 3.38310674886283e-08, 'epoch': 0.96} + 96%|█████████▋| 11751/12188 [1:42:17<57:12, 7.86s/it] 96%|█████████▋| 11752/12188 [1:42:24<56:03, 7.72s/it] {'loss': 0.2929, 'grad_norm': 0.7263679514888431, 'learning_rate': 3.3676937525032314e-08, 'epoch': 0.96} + 96%|█████████▋| 11752/12188 [1:42:24<56:03, 7.72s/it] 96%|█████████▋| 11753/12188 [1:42:32<56:11, 7.75s/it] {'loss': 0.3357, 'grad_norm': 0.6551739102594666, 'learning_rate': 3.35231582751927e-08, 'epoch': 0.96} + 96%|█████████▋| 11753/12188 [1:42:32<56:11, 7.75s/it] 96%|█████████▋| 11754/12188 [1:42:40<55:40, 7.70s/it] {'loss': 0.3413, 'grad_norm': 0.6933551625860638, 'learning_rate': 3.336972974996799e-08, 'epoch': 0.96} + 96%|█████████▋| 11754/12188 [1:42:40<55:40, 7.70s/it] 96%|█████████▋| 11755/12188 [1:42:47<55:12, 7.65s/it] {'loss': 0.2654, 'grad_norm': 0.6829270029722005, 'learning_rate': 3.321665196019286e-08, 'epoch': 0.96} + 96%|█████████▋| 11755/12188 [1:42:47<55:12, 7.65s/it] 96%|█████████▋| 11756/12188 [1:42:55<55:17, 7.68s/it] {'loss': 0.289, 'grad_norm': 0.6991471688637967, 'learning_rate': 3.306392491667865e-08, 'epoch': 0.96} + 96%|█████████▋| 11756/12188 [1:42:55<55:17, 7.68s/it] 96%|█████████▋| 11757/12188 [1:43:02<54:38, 7.61s/it] {'loss': 0.3025, 'grad_norm': 0.7392448930717189, 'learning_rate': 3.291154863021007e-08, 'epoch': 0.96} + 96%|█████████▋| 11757/12188 [1:43:02<54:38, 7.61s/it] 96%|█████████▋| 11758/12188 [1:43:11<55:53, 7.80s/it] {'loss': 0.2715, 'grad_norm': 0.7546942825520505, 'learning_rate': 3.27595231115474e-08, 'epoch': 0.96} + 96%|█████████▋| 11758/12188 [1:43:11<55:53, 7.80s/it] 96%|█████████▋| 11759/12188 [1:43:18<56:03, 7.84s/it] {'loss': 0.2704, 'grad_norm': 0.7072459610522315, 'learning_rate': 3.260784837142705e-08, 'epoch': 0.96} + 96%|█████████▋| 11759/12188 [1:43:18<56:03, 7.84s/it] 96%|█████████▋| 11760/12188 [1:43:26<55:12, 7.74s/it] {'loss': 0.313, 'grad_norm': 0.6661378087313085, 'learning_rate': 3.2456524420559354e-08, 'epoch': 0.96} + 96%|█████████▋| 11760/12188 [1:43:26<55:12, 7.74s/it] 96%|█████████▋| 11761/12188 [1:43:34<55:03, 7.74s/it] {'loss': 0.3355, 'grad_norm': 0.7157668773873078, 'learning_rate': 3.230555126963131e-08, 'epoch': 0.96} + 96%|█████████▋| 11761/12188 [1:43:34<55:03, 7.74s/it] 97%|█████████▋| 11762/12188 [1:43:42<55:38, 7.84s/it] {'loss': 0.3107, 'grad_norm': 0.8191401978842636, 'learning_rate': 3.215492892930383e-08, 'epoch': 0.97} + 97%|█████████▋| 11762/12188 [1:43:42<55:38, 7.84s/it] 97%|█████████▋| 11763/12188 [1:43:49<54:39, 7.72s/it] {'loss': 0.301, 'grad_norm': 0.6897975781707293, 'learning_rate': 3.200465741021341e-08, 'epoch': 0.97} + 97%|█████████▋| 11763/12188 [1:43:49<54:39, 7.72s/it] 97%|█████████▋| 11764/12188 [1:43:57<53:50, 7.62s/it] {'loss': 0.3408, 'grad_norm': 0.843358889834847, 'learning_rate': 3.185473672297323e-08, 'epoch': 0.97} + 97%|█████████▋| 11764/12188 [1:43:57<53:50, 7.62s/it] 97%|█████████▋| 11765/12188 [1:44:04<53:07, 7.54s/it] {'loss': 0.3402, 'grad_norm': 0.7510466055212545, 'learning_rate': 3.170516687816871e-08, 'epoch': 0.97} + 97%|█████████▋| 11765/12188 [1:44:04<53:07, 7.54s/it] 97%|█████████▋| 11766/12188 [1:44:12<53:50, 7.66s/it] {'loss': 0.2778, 'grad_norm': 0.6529991969263421, 'learning_rate': 3.1555947886363626e-08, 'epoch': 0.97} + 97%|█████████▋| 11766/12188 [1:44:12<53:50, 7.66s/it] 97%|█████████▋| 11767/12188 [1:44:20<53:40, 7.65s/it] {'loss': 0.3404, 'grad_norm': 0.7107735699270215, 'learning_rate': 3.1407079758095646e-08, 'epoch': 0.97} + 97%|█████████▋| 11767/12188 [1:44:20<53:40, 7.65s/it] 97%|█████████▋| 11768/12188 [1:44:31<1:00:39, 8.67s/it] {'loss': 0.3534, 'grad_norm': 0.6966059195690419, 'learning_rate': 3.125856250387638e-08, 'epoch': 0.97} + 97%|█████████▋| 11768/12188 [1:44:31<1:00:39, 8.67s/it] 97%|█████████▋| 11769/12188 [1:44:38<58:19, 8.35s/it] {'loss': 0.2756, 'grad_norm': 0.7103916350377413, 'learning_rate': 3.111039613419464e-08, 'epoch': 0.97} + 97%|█████████▋| 11769/12188 [1:44:38<58:19, 8.35s/it] 97%|█████████▋| 11770/12188 [1:44:46<56:46, 8.15s/it] {'loss': 0.3267, 'grad_norm': 0.6936868108420756, 'learning_rate': 3.096258065951319e-08, 'epoch': 0.97} + 97%|█████████▋| 11770/12188 [1:44:46<56:46, 8.15s/it] 97%|█████████▋| 11771/12188 [1:44:57<1:02:43, 9.03s/it] {'loss': 0.2776, 'grad_norm': 0.662406752577065, 'learning_rate': 3.081511609027144e-08, 'epoch': 0.97} + 97%|█████████▋| 11771/12188 [1:44:57<1:02:43, 9.03s/it] 97%|█████████▋| 11772/12188 [1:45:04<59:01, 8.51s/it] {'loss': 0.2985, 'grad_norm': 0.7209581296349444, 'learning_rate': 3.066800243688273e-08, 'epoch': 0.97} + 97%|█████████▋| 11772/12188 [1:45:04<59:01, 8.51s/it] 97%|█████████▋| 11773/12188 [1:45:12<56:43, 8.20s/it] {'loss': 0.292, 'grad_norm': 0.7102255419906799, 'learning_rate': 3.052123970973542e-08, 'epoch': 0.97} + 97%|█████████▋| 11773/12188 [1:45:12<56:43, 8.20s/it] 97%|█████████▋| 11774/12188 [1:45:19<55:08, 7.99s/it] {'loss': 0.3165, 'grad_norm': 0.7185573342960094, 'learning_rate': 3.0374827919193994e-08, 'epoch': 0.97} + 97%|█████████▋| 11774/12188 [1:45:19<55:08, 7.99s/it] 97%|█████████▋| 11775/12188 [1:45:27<54:08, 7.86s/it] {'loss': 0.3118, 'grad_norm': 0.6726121937627337, 'learning_rate': 3.022876707559796e-08, 'epoch': 0.97} + 97%|█████████▋| 11775/12188 [1:45:27<54:08, 7.86s/it] 97%|█████████▋| 11776/12188 [1:45:34<53:18, 7.76s/it] {'loss': 0.2794, 'grad_norm': 0.6976511000687883, 'learning_rate': 3.008305718926241e-08, 'epoch': 0.97} + 97%|█████████▋| 11776/12188 [1:45:34<53:18, 7.76s/it] 97%|█████████▋| 11777/12188 [1:45:41<51:59, 7.59s/it] {'loss': 0.3299, 'grad_norm': 0.7442440970151887, 'learning_rate': 2.9937698270476324e-08, 'epoch': 0.97} + 97%|█████████▋| 11777/12188 [1:45:42<51:59, 7.59s/it] 97%|█████████▋| 11778/12188 [1:45:49<51:46, 7.58s/it] {'loss': 0.2967, 'grad_norm': 0.7935176306709048, 'learning_rate': 2.979269032950427e-08, 'epoch': 0.97} + 97%|█████████▋| 11778/12188 [1:45:49<51:46, 7.58s/it] 97%|█████████▋| 11779/12188 [1:45:56<51:13, 7.52s/it] {'loss': 0.3232, 'grad_norm': 0.7844202432870817, 'learning_rate': 2.9648033376588058e-08, 'epoch': 0.97} + 97%|█████████▋| 11779/12188 [1:45:56<51:13, 7.52s/it] 97%|█████████▋| 11780/12188 [1:46:04<51:48, 7.62s/it] {'loss': 0.3054, 'grad_norm': 0.6478186246615673, 'learning_rate': 2.950372742194174e-08, 'epoch': 0.97} + 97%|█████████▋| 11780/12188 [1:46:04<51:48, 7.62s/it] 97%|█████████▋| 11781/12188 [1:46:12<52:33, 7.75s/it] {'loss': 0.2981, 'grad_norm': 0.6835916286926742, 'learning_rate': 2.9359772475757164e-08, 'epoch': 0.97} + 97%|█████████▋| 11781/12188 [1:46:12<52:33, 7.75s/it] 97%|█████████▋| 11782/12188 [1:46:21<53:54, 7.97s/it] {'loss': 0.2975, 'grad_norm': 0.7080681723579166, 'learning_rate': 2.9216168548198975e-08, 'epoch': 0.97} + 97%|█████████▋| 11782/12188 [1:46:21<53:54, 7.97s/it] 97%|█████████▋| 11783/12188 [1:46:29<53:14, 7.89s/it] {'loss': 0.2796, 'grad_norm': 0.6719710670099752, 'learning_rate': 2.9072915649408505e-08, 'epoch': 0.97} + 97%|█████████▋| 11783/12188 [1:46:29<53:14, 7.89s/it] 97%|█████████▋| 11784/12188 [1:46:36<52:24, 7.78s/it] {'loss': 0.3149, 'grad_norm': 0.6566857922021959, 'learning_rate': 2.893001378950322e-08, 'epoch': 0.97} + 97%|█████████▋| 11784/12188 [1:46:36<52:24, 7.78s/it] 97%|█████████▋| 11785/12188 [1:46:43<51:20, 7.64s/it] {'loss': 0.277, 'grad_norm': 0.7279379061059654, 'learning_rate': 2.8787462978572823e-08, 'epoch': 0.97} + 97%|█████████▋| 11785/12188 [1:46:43<51:20, 7.64s/it] 97%|█████████▋| 11786/12188 [1:46:51<50:42, 7.57s/it] {'loss': 0.2878, 'grad_norm': 0.6858089075669881, 'learning_rate': 2.864526322668537e-08, 'epoch': 0.97} + 97%|█████████▋| 11786/12188 [1:46:51<50:42, 7.57s/it] 97%|█████████▋| 11787/12188 [1:46:58<50:47, 7.60s/it] {'loss': 0.2985, 'grad_norm': 0.654258835322203, 'learning_rate': 2.850341454388228e-08, 'epoch': 0.97} + 97%|█████████▋| 11787/12188 [1:46:58<50:47, 7.60s/it] 97%|█████████▋| 11788/12188 [1:47:06<50:20, 7.55s/it] {'loss': 0.288, 'grad_norm': 0.8938497180491137, 'learning_rate': 2.8361916940180534e-08, 'epoch': 0.97} + 97%|█████████▋| 11788/12188 [1:47:06<50:20, 7.55s/it] 97%|█████████▋| 11789/12188 [1:47:13<49:44, 7.48s/it] {'loss': 0.2971, 'grad_norm': 1.037729649311417, 'learning_rate': 2.8220770425573253e-08, 'epoch': 0.97} + 97%|█████████▋| 11789/12188 [1:47:13<49:44, 7.48s/it] 97%|█████████▋| 11790/12188 [1:47:20<49:16, 7.43s/it] {'loss': 0.3006, 'grad_norm': 0.6881692190811911, 'learning_rate': 2.8079975010026904e-08, 'epoch': 0.97} + 97%|█████████▋| 11790/12188 [1:47:21<49:16, 7.43s/it][2025-08-19 00:46:19,111] [WARNING] [stage3.py:2118:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time + 97%|█████████▋| 11791/12188 [1:47:31<55:54, 8.45s/it] {'loss': 0.3022, 'grad_norm': 0.7383589309523635, 'learning_rate': 2.7939530703484652e-08, 'epoch': 0.97} + 97%|█████████▋| 11791/12188 [1:47:31<55:54, 8.45s/it] 97%|█████████▋| 11792/12188 [1:47:40<55:41, 8.44s/it] {'loss': 0.2711, 'grad_norm': 0.7378144101709202, 'learning_rate': 2.7799437515864668e-08, 'epoch': 0.97} + 97%|█████████▋| 11792/12188 [1:47:40<55:41, 8.44s/it] 97%|█████████▋| 11793/12188 [1:47:47<54:08, 8.22s/it] {'loss': 0.2814, 'grad_norm': 0.6836823333985783, 'learning_rate': 2.76596954570596e-08, 'epoch': 0.97} + 97%|█████████▋| 11793/12188 [1:47:47<54:08, 8.22s/it] 97%|█████████▋| 11794/12188 [1:47:55<52:33, 8.00s/it] {'loss': 0.2936, 'grad_norm': 0.7107867160544855, 'learning_rate': 2.752030453693877e-08, 'epoch': 0.97} + 97%|█████████▋| 11794/12188 [1:47:55<52:33, 8.00s/it] 97%|█████████▋| 11795/12188 [1:48:03<51:53, 7.92s/it] {'loss': 0.2852, 'grad_norm': 0.6976072985645755, 'learning_rate': 2.7381264765344862e-08, 'epoch': 0.97} + 97%|█████████▋| 11795/12188 [1:48:03<51:53, 7.92s/it] 97%|█████████▋| 11796/12188 [1:48:10<50:29, 7.73s/it] {'loss': 0.2965, 'grad_norm': 0.6754633412006988, 'learning_rate': 2.7242576152097245e-08, 'epoch': 0.97} + 97%|█████████▋| 11796/12188 [1:48:10<50:29, 7.73s/it] 97%|█████████▋| 11797/12188 [1:48:18<50:00, 7.67s/it] {'loss': 0.2951, 'grad_norm': 0.6905375990808529, 'learning_rate': 2.7104238706989194e-08, 'epoch': 0.97} + 97%|█████████▋| 11797/12188 [1:48:18<50:00, 7.67s/it] 97%|█████████▋| 11798/12188 [1:48:25<50:13, 7.73s/it] {'loss': 0.3041, 'grad_norm': 0.7672391623296835, 'learning_rate': 2.696625243979012e-08, 'epoch': 0.97} + 97%|█████████▋| 11798/12188 [1:48:25<50:13, 7.73s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fa64d0e61b0> +[Try #0] Failed to fetch sample 4817762 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fa64d0e61b0> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Shop by category'"}, {'from': 'gpt', 'value': '\nclick(x=0.3395, y=0.1945)\n'}]} + 97%|█████████▋| 11799/12188 [1:48:33<49:41, 7.66s/it] {'loss': 0.2554, 'grad_norm': 0.635035930184306, 'learning_rate': 2.6828617360244448e-08, 'epoch': 0.97} + 97%|█████████▋| 11799/12188 [1:48:33<49:41, 7.66s/it] 97%|█████████▋| 11800/12188 [1:48:41<50:08, 7.75s/it] {'loss': 0.2935, 'grad_norm': 0.703098335597188, 'learning_rate': 2.669133347807218e-08, 'epoch': 0.97} + 97%|█████████▋| 11800/12188 [1:48:41<50:08, 7.75s/it] 97%|█████████▋| 11801/12188 [1:48:50<52:24, 8.13s/it] {'loss': 0.297, 'grad_norm': 0.7716559218070024, 'learning_rate': 2.6554400802967785e-08, 'epoch': 0.97} + 97%|█████████▋| 11801/12188 [1:48:50<52:24, 8.13s/it] 97%|█████████▋| 11802/12188 [1:48:58<51:27, 8.00s/it] {'loss': 0.3381, 'grad_norm': 0.7600061280752313, 'learning_rate': 2.6417819344600747e-08, 'epoch': 0.97} + 97%|█████████▋| 11802/12188 [1:48:58<51:27, 8.00s/it] 97%|█████████▋| 11803/12188 [1:49:05<51:06, 7.96s/it] {'loss': 0.3026, 'grad_norm': 0.7281152095304223, 'learning_rate': 2.628158911261669e-08, 'epoch': 0.97} + 97%|█████████▋| 11803/12188 [1:49:05<51:06, 7.96s/it] 97%|█████████▋| 11804/12188 [1:49:13<50:36, 7.91s/it] {'loss': 0.3277, 'grad_norm': 0.878117359464687, 'learning_rate': 2.6145710116636246e-08, 'epoch': 0.97} + 97%|█████████▋| 11804/12188 [1:49:13<50:36, 7.91s/it] 97%|█████████▋| 11805/12188 [1:49:21<50:33, 7.92s/it] {'loss': 0.2962, 'grad_norm': 0.7045401183836395, 'learning_rate': 2.6010182366254523e-08, 'epoch': 0.97} + 97%|█████████▋| 11805/12188 [1:49:21<50:33, 7.92s/it] 97%|█████████▋| 11806/12188 [1:49:29<50:16, 7.90s/it] {'loss': 0.267, 'grad_norm': 0.8132685235714541, 'learning_rate': 2.5875005871042192e-08, 'epoch': 0.97} + 97%|█████████▋| 11806/12188 [1:49:29<50:16, 7.90s/it] 97%|█████████▋| 11807/12188 [1:49:36<48:46, 7.68s/it] {'loss': 0.2926, 'grad_norm': 0.6492678706933637, 'learning_rate': 2.574018064054551e-08, 'epoch': 0.97} + 97%|█████████▋| 11807/12188 [1:49:36<48:46, 7.68s/it] 97%|█████████▋| 11808/12188 [1:49:44<48:42, 7.69s/it] {'loss': 0.2945, 'grad_norm': 0.8165854682513639, 'learning_rate': 2.5605706684285747e-08, 'epoch': 0.97} + 97%|█████████▋| 11808/12188 [1:49:44<48:42, 7.69s/it] 97%|█████████▋| 11809/12188 [1:49:54<53:16, 8.43s/it] {'loss': 0.3012, 'grad_norm': 0.7117295351403994, 'learning_rate': 2.5471584011758645e-08, 'epoch': 0.97} + 97%|█████████▋| 11809/12188 [1:49:54<53:16, 8.43s/it] 97%|█████████▋| 11810/12188 [1:50:04<55:26, 8.80s/it] {'loss': 0.349, 'grad_norm': 0.8033442978484137, 'learning_rate': 2.533781263243662e-08, 'epoch': 0.97} + 97%|█████████▋| 11810/12188 [1:50:04<55:26, 8.80s/it] 97%|█████████▋| 11811/12188 [1:50:11<52:46, 8.40s/it] {'loss': 0.311, 'grad_norm': 0.7181463202292038, 'learning_rate': 2.5204392555765456e-08, 'epoch': 0.97} + 97%|█████████▋| 11811/12188 [1:50:11<52:46, 8.40s/it] 97%|█████████▋| 11812/12188 [1:50:19<52:18, 8.35s/it] {'loss': 0.3055, 'grad_norm': 0.6781159597992863, 'learning_rate': 2.5071323791167058e-08, 'epoch': 0.97} + 97%|█████████▋| 11812/12188 [1:50:19<52:18, 8.35s/it] 97%|█████████▋| 11813/12188 [1:50:27<50:51, 8.14s/it] {'loss': 0.282, 'grad_norm': 0.87114516363029, 'learning_rate': 2.4938606348040017e-08, 'epoch': 0.97} + 97%|█████████▋| 11813/12188 [1:50:27<50:51, 8.14s/it] 97%|█████████▋| 11814/12188 [1:50:34<49:22, 7.92s/it] {'loss': 0.3029, 'grad_norm': 1.1970908837802041, 'learning_rate': 2.4806240235754618e-08, 'epoch': 0.97} + 97%|█████████▋| 11814/12188 [1:50:34<49:22, 7.92s/it] 97%|█████████▋| 11815/12188 [1:50:42<49:23, 7.95s/it] {'loss': 0.2972, 'grad_norm': 0.6641540085242239, 'learning_rate': 2.4674225463659495e-08, 'epoch': 0.97} + 97%|█████████▋| 11815/12188 [1:50:42<49:23, 7.95s/it] 97%|█████████▋| 11816/12188 [1:50:52<52:54, 8.53s/it] {'loss': 0.2947, 'grad_norm': 0.6413399175726497, 'learning_rate': 2.454256204107719e-08, 'epoch': 0.97} + 97%|█████████▋| 11816/12188 [1:50:52<52:54, 8.53s/it] 97%|█████████▋| 11817/12188 [1:51:00<51:15, 8.29s/it] {'loss': 0.2843, 'grad_norm': 0.6620351853207994, 'learning_rate': 2.4411249977305264e-08, 'epoch': 0.97} + 97%|█████████▋| 11817/12188 [1:51:00<51:15, 8.29s/it] 97%|█████████▋| 11818/12188 [1:51:07<49:13, 7.98s/it] {'loss': 0.3096, 'grad_norm': 0.7497840617845648, 'learning_rate': 2.4280289281617407e-08, 'epoch': 0.97} + 97%|█████████▋| 11818/12188 [1:51:07<49:13, 7.98s/it] 97%|█████████▋| 11819/12188 [1:51:16<49:49, 8.10s/it] {'loss': 0.2855, 'grad_norm': 0.7379717583049524, 'learning_rate': 2.414967996326123e-08, 'epoch': 0.97} + 97%|█████████▋| 11819/12188 [1:51:16<49:49, 8.10s/it] 97%|█████████▋| 11820/12188 [1:51:24<49:31, 8.07s/it] {'loss': 0.2874, 'grad_norm': 0.7237847515912483, 'learning_rate': 2.401942203146046e-08, 'epoch': 0.97} + 97%|█████████▋| 11820/12188 [1:51:24<49:31, 8.07s/it] 97%|█████████▋| 11821/12188 [1:51:32<48:58, 8.01s/it] {'loss': 0.2883, 'grad_norm': 1.020435977137287, 'learning_rate': 2.3889515495413297e-08, 'epoch': 0.97} + 97%|█████████▋| 11821/12188 [1:51:32<48:58, 8.01s/it] 97%|█████████▋| 11822/12188 [1:51:39<48:31, 7.95s/it] {'loss': 0.2779, 'grad_norm': 0.729852470527345, 'learning_rate': 2.3759960364294067e-08, 'epoch': 0.97} + 97%|█████████▋| 11822/12188 [1:51:39<48:31, 7.95s/it] 97%|█████████▋| 11823/12188 [1:51:47<47:57, 7.88s/it] {'loss': 0.2896, 'grad_norm': 0.6880037615420559, 'learning_rate': 2.363075664725156e-08, 'epoch': 0.97} + 97%|█████████▋| 11823/12188 [1:51:47<47:57, 7.88s/it] 97%|█████████▋| 11824/12188 [1:51:55<47:14, 7.79s/it] {'loss': 0.2952, 'grad_norm': 1.5304901313436763, 'learning_rate': 2.3501904353409598e-08, 'epoch': 0.97} + 97%|█████████▋| 11824/12188 [1:51:55<47:14, 7.79s/it] 97%|█████████▋| 11825/12188 [1:52:02<46:37, 7.71s/it] {'loss': 0.3152, 'grad_norm': 0.7313967453619797, 'learning_rate': 2.3373403491867562e-08, 'epoch': 0.97} + 97%|█████████▋| 11825/12188 [1:52:02<46:37, 7.71s/it] 97%|█████████▋| 11826/12188 [1:52:10<46:55, 7.78s/it] {'loss': 0.2711, 'grad_norm': 0.7401796146652507, 'learning_rate': 2.3245254071700418e-08, 'epoch': 0.97} + 97%|█████████▋| 11826/12188 [1:52:10<46:55, 7.78s/it] 97%|█████████▋| 11827/12188 [1:52:17<46:00, 7.65s/it] {'loss': 0.332, 'grad_norm': 0.7694185180377083, 'learning_rate': 2.3117456101958148e-08, 'epoch': 0.97} + 97%|█████████▋| 11827/12188 [1:52:18<46:00, 7.65s/it] 97%|█████████▋| 11828/12188 [1:52:25<46:23, 7.73s/it] {'loss': 0.3057, 'grad_norm': 0.6548918451824549, 'learning_rate': 2.2990009591664642e-08, 'epoch': 0.97} + 97%|█████████▋| 11828/12188 [1:52:25<46:23, 7.73s/it] 97%|█████████▋| 11829/12188 [1:52:35<49:20, 8.25s/it] {'loss': 0.3194, 'grad_norm': 0.7018499720590083, 'learning_rate': 2.286291454982048e-08, 'epoch': 0.97} + 97%|█████████▋| 11829/12188 [1:52:35<49:20, 8.25s/it] 97%|█████████▋| 11830/12188 [1:52:42<47:48, 8.01s/it] {'loss': 0.2878, 'grad_norm': 0.6994378354522104, 'learning_rate': 2.2736170985401262e-08, 'epoch': 0.97} + 97%|█████████▋| 11830/12188 [1:52:42<47:48, 8.01s/it] 97%|█████████▋| 11831/12188 [1:52:50<46:27, 7.81s/it] {'loss': 0.2994, 'grad_norm': 0.7262117096782068, 'learning_rate': 2.260977890735705e-08, 'epoch': 0.97} + 97%|█████████▋| 11831/12188 [1:52:50<46:27, 7.81s/it] 97%|█████████▋| 11832/12188 [1:52:58<46:34, 7.85s/it] {'loss': 0.3073, 'grad_norm': 0.727453945882473, 'learning_rate': 2.2483738324612924e-08, 'epoch': 0.97} + 97%|█████████▋| 11832/12188 [1:52:58<46:34, 7.85s/it] 97%|█████████▋| 11833/12188 [1:53:05<45:21, 7.67s/it] {'loss': 0.3249, 'grad_norm': 0.7953501504770691, 'learning_rate': 2.2358049246070658e-08, 'epoch': 0.97} + 97%|█████████▋| 11833/12188 [1:53:05<45:21, 7.67s/it] 97%|█████████▋| 11834/12188 [1:53:12<44:17, 7.51s/it] {'loss': 0.2744, 'grad_norm': 0.6890075219046383, 'learning_rate': 2.2232711680605368e-08, 'epoch': 0.97} + 97%|█████████▋| 11834/12188 [1:53:12<44:17, 7.51s/it] 97%|█████████▋| 11835/12188 [1:53:19<43:51, 7.45s/it] {'loss': 0.3285, 'grad_norm': 0.7290735435767984, 'learning_rate': 2.210772563706942e-08, 'epoch': 0.97} + 97%|█████████▋| 11835/12188 [1:53:19<43:51, 7.45s/it] 97%|█████████▋| 11836/12188 [1:53:27<44:05, 7.52s/it] {'loss': 0.3188, 'grad_norm': 0.6560437490470241, 'learning_rate': 2.1983091124287426e-08, 'epoch': 0.97} + 97%|█████████▋| 11836/12188 [1:53:27<44:05, 7.52s/it] 97%|█████████▋| 11837/12188 [1:53:35<44:29, 7.60s/it] {'loss': 0.2816, 'grad_norm': 0.7312078717881314, 'learning_rate': 2.185880815106234e-08, 'epoch': 0.97} + 97%|█████████▋| 11837/12188 [1:53:35<44:29, 7.60s/it] 97%|█████████▋| 11838/12188 [1:53:42<43:50, 7.51s/it] {'loss': 0.3005, 'grad_norm': 0.646126352171407, 'learning_rate': 2.1734876726169918e-08, 'epoch': 0.97} + 97%|█████████▋| 11838/12188 [1:53:42<43:50, 7.51s/it] 97%|█████████▋| 11839/12188 [1:53:49<43:26, 7.47s/it] {'loss': 0.3003, 'grad_norm': 0.6586355376686953, 'learning_rate': 2.1611296858362052e-08, 'epoch': 0.97} + 97%|█████████▋| 11839/12188 [1:53:49<43:26, 7.47s/it] 97%|█████████▋| 11840/12188 [1:54:01<50:03, 8.63s/it] {'loss': 0.2981, 'grad_norm': 0.7295545429498, 'learning_rate': 2.1488068556366205e-08, 'epoch': 0.97} + 97%|█████████▋| 11840/12188 [1:54:01<50:03, 8.63s/it] 97%|█████████▋| 11841/12188 [1:54:09<49:11, 8.51s/it] {'loss': 0.2922, 'grad_norm': 0.6969707480221857, 'learning_rate': 2.1365191828884857e-08, 'epoch': 0.97} + 97%|█████████▋| 11841/12188 [1:54:09<49:11, 8.51s/it] 97%|█████████▋| 11842/12188 [1:54:17<47:32, 8.24s/it] {'loss': 0.3069, 'grad_norm': 0.6974483829908057, 'learning_rate': 2.1242666684594405e-08, 'epoch': 0.97} + 97%|█████████▋| 11842/12188 [1:54:17<47:32, 8.24s/it] 97%|█████████▋| 11843/12188 [1:54:25<47:24, 8.25s/it] {'loss': 0.314, 'grad_norm': 0.7203085632174782, 'learning_rate': 2.1120493132147924e-08, 'epoch': 0.97} + 97%|█████████▋| 11843/12188 [1:54:25<47:24, 8.25s/it] 97%|█████████▋| 11844/12188 [1:54:32<45:53, 8.00s/it] {'loss': 0.3043, 'grad_norm': 0.7138719014747184, 'learning_rate': 2.0998671180172957e-08, 'epoch': 0.97} + 97%|█████████▋| 11844/12188 [1:54:32<45:53, 8.00s/it] 97%|█████████▋| 11845/12188 [1:54:40<44:31, 7.79s/it] {'loss': 0.3082, 'grad_norm': 0.7434399579216556, 'learning_rate': 2.0877200837272626e-08, 'epoch': 0.97} + 97%|█████████▋| 11845/12188 [1:54:40<44:31, 7.79s/it] 97%|█████████▋| 11846/12188 [1:54:47<43:59, 7.72s/it] {'loss': 0.2937, 'grad_norm': 0.8450178906984308, 'learning_rate': 2.075608211202451e-08, 'epoch': 0.97} + 97%|█████████▋| 11846/12188 [1:54:47<43:59, 7.72s/it] 97%|█████████▋| 11847/12188 [1:54:56<45:37, 8.03s/it] {'loss': 0.3024, 'grad_norm': 0.7231087220832549, 'learning_rate': 2.0635315012982325e-08, 'epoch': 0.97} + 97%|█████████▋| 11847/12188 [1:54:56<45:37, 8.03s/it] 97%|█████████▋| 11848/12188 [1:55:03<44:32, 7.86s/it] {'loss': 0.2691, 'grad_norm': 0.687489386650773, 'learning_rate': 2.051489954867425e-08, 'epoch': 0.97} + 97%|█████████▋| 11848/12188 [1:55:03<44:32, 7.86s/it] 97%|█████████▋| 11849/12188 [1:55:11<43:49, 7.76s/it] {'loss': 0.3105, 'grad_norm': 0.78895999041588, 'learning_rate': 2.039483572760348e-08, 'epoch': 0.97} + 97%|█████████▋| 11849/12188 [1:55:11<43:49, 7.76s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fb3117e7240> +[Try #0] Failed to fetch sample 4692136 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7fb3117e7240> +Problematic sample: {'image': '20240823_021250_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Picture 1 of 18'"}, {'from': 'gpt', 'value': '\nclick(x=0.245, y=0.4295)\n'}]} + 97%|█████████▋| 11850/12188 [1:55:20<45:35, 8.09s/it] {'loss': 0.3303, 'grad_norm': 0.777166501845725, 'learning_rate': 2.02751235582499e-08, 'epoch': 0.97} + 97%|█████████▋| 11850/12188 [1:55:20<45:35, 8.09s/it] 97%|█████████▋| 11851/12188 [1:55:27<44:05, 7.85s/it] {'loss': 0.2776, 'grad_norm': 0.8006298078922864, 'learning_rate': 2.015576304906619e-08, 'epoch': 0.97} + 97%|█████████▋| 11851/12188 [1:55:27<44:05, 7.85s/it] 97%|█████████▋| 11852/12188 [1:55:35<43:39, 7.80s/it] {'loss': 0.3158, 'grad_norm': 0.7090074049924731, 'learning_rate': 2.003675420848117e-08, 'epoch': 0.97} + 97%|█████████▋| 11852/12188 [1:55:35<43:39, 7.80s/it] 97%|█████████▋| 11853/12188 [1:55:43<43:48, 7.85s/it] {'loss': 0.2907, 'grad_norm': 0.6808212569354767, 'learning_rate': 1.991809704490033e-08, 'epoch': 0.97} + 97%|█████████▋| 11853/12188 [1:55:43<43:48, 7.85s/it] 97%|█████████▋| 11854/12188 [1:55:50<42:44, 7.68s/it] {'loss': 0.3237, 'grad_norm': 0.6495796948369779, 'learning_rate': 1.9799791566701975e-08, 'epoch': 0.97} + 97%|█████████▋| 11854/12188 [1:55:50<42:44, 7.68s/it] 97%|█████████▋| 11855/12188 [1:55:57<42:05, 7.58s/it] {'loss': 0.3467, 'grad_norm': 0.6703849856282391, 'learning_rate': 1.9681837782241086e-08, 'epoch': 0.97} + 97%|█████████▋| 11855/12188 [1:55:57<42:05, 7.58s/it] 97%|█████████▋| 11856/12188 [1:56:05<42:13, 7.63s/it] {'loss': 0.3046, 'grad_norm': 0.7127639592683117, 'learning_rate': 1.9564235699847666e-08, 'epoch': 0.97} + 97%|█████████▋| 11856/12188 [1:56:05<42:13, 7.63s/it] 97%|█████████▋| 11857/12188 [1:56:13<42:32, 7.71s/it] {'loss': 0.2791, 'grad_norm': 0.7255804908107256, 'learning_rate': 1.944698532782563e-08, 'epoch': 0.97} + 97%|█████████▋| 11857/12188 [1:56:13<42:32, 7.71s/it] 97%|█████████▋| 11858/12188 [1:56:20<41:53, 7.62s/it] {'loss': 0.3304, 'grad_norm': 0.7625606994101352, 'learning_rate': 1.9330086674456128e-08, 'epoch': 0.97} + 97%|█████████▋| 11858/12188 [1:56:20<41:53, 7.62s/it] 97%|█████████▋| 11859/12188 [1:56:28<41:32, 7.58s/it] {'loss': 0.3244, 'grad_norm': 0.734686387055305, 'learning_rate': 1.921353974799367e-08, 'epoch': 0.97} + 97%|█████████▋| 11859/12188 [1:56:28<41:32, 7.58s/it] 97%|█████████▋| 11860/12188 [1:56:36<41:38, 7.62s/it] {'loss': 0.2697, 'grad_norm': 0.7131614917844517, 'learning_rate': 1.9097344556668894e-08, 'epoch': 0.97} + 97%|█████████▋| 11860/12188 [1:56:36<41:38, 7.62s/it] 97%|█████████▋| 11861/12188 [1:56:43<41:44, 7.66s/it] {'loss': 0.3029, 'grad_norm': 0.7140473088537427, 'learning_rate': 1.8981501108686907e-08, 'epoch': 0.97} + 97%|█████████▋| 11861/12188 [1:56:43<41:44, 7.66s/it] 97%|███��█████▋| 11862/12188 [1:56:52<42:29, 7.82s/it] {'loss': 0.3004, 'grad_norm': 0.7806517525876355, 'learning_rate': 1.8866009412228937e-08, 'epoch': 0.97} + 97%|█████████▋| 11862/12188 [1:56:52<42:29, 7.82s/it] 97%|█████████▋| 11863/12188 [1:57:00<42:52, 7.92s/it] {'loss': 0.291, 'grad_norm': 0.6570360109473535, 'learning_rate': 1.8750869475450682e-08, 'epoch': 0.97} + 97%|█████████▋| 11863/12188 [1:57:00<42:52, 7.92s/it] 97%|█████████▋| 11864/12188 [1:57:08<43:10, 8.00s/it] {'loss': 0.2657, 'grad_norm': 0.7406834634261138, 'learning_rate': 1.8636081306482866e-08, 'epoch': 0.97} + 97%|█████████▋| 11864/12188 [1:57:08<43:10, 8.00s/it] 97%|█████████▋| 11865/12188 [1:57:15<42:23, 7.87s/it] {'loss': 0.2748, 'grad_norm': 0.7697500186974211, 'learning_rate': 1.852164491343178e-08, 'epoch': 0.97} + 97%|█████████▋| 11865/12188 [1:57:15<42:23, 7.87s/it] 97%|█████████▋| 11866/12188 [1:57:23<41:38, 7.76s/it] {'loss': 0.2939, 'grad_norm': 0.7281351805770805, 'learning_rate': 1.8407560304378736e-08, 'epoch': 0.97} + 97%|█████████▋| 11866/12188 [1:57:23<41:38, 7.76s/it] 97%|█████████▋| 11867/12188 [1:57:30<41:03, 7.67s/it] {'loss': 0.3123, 'grad_norm': 0.9983206095968886, 'learning_rate': 1.8293827487380623e-08, 'epoch': 0.97} + 97%|█████████▋| 11867/12188 [1:57:30<41:03, 7.67s/it] 97%|█████████▋| 11868/12188 [1:57:38<40:39, 7.62s/it] {'loss': 0.284, 'grad_norm': 0.7781674308254124, 'learning_rate': 1.818044647046824e-08, 'epoch': 0.97} + 97%|█████████▋| 11868/12188 [1:57:38<40:39, 7.62s/it] 97%|█████████▋| 11869/12188 [1:57:46<40:38, 7.65s/it] {'loss': 0.2806, 'grad_norm': 0.7511739858716409, 'learning_rate': 1.8067417261649066e-08, 'epoch': 0.97} + 97%|█████████▋| 11869/12188 [1:57:46<40:38, 7.65s/it] 97%|█████████▋| 11870/12188 [1:57:53<40:25, 7.63s/it] {'loss': 0.2991, 'grad_norm': 0.7295571222003446, 'learning_rate': 1.795473986890506e-08, 'epoch': 0.97} + 97%|█████████▋| 11870/12188 [1:57:53<40:25, 7.63s/it] 97%|█████████▋| 11871/12188 [1:58:01<39:54, 7.55s/it] {'loss': 0.3044, 'grad_norm': 0.814416039664701, 'learning_rate': 1.7842414300192624e-08, 'epoch': 0.97} + 97%|█████████▋| 11871/12188 [1:58:01<39:54, 7.55s/it] 97%|█████████▋| 11872/12188 [1:58:08<40:17, 7.65s/it] {'loss': 0.3206, 'grad_norm': 0.9101091764155838, 'learning_rate': 1.7730440563444307e-08, 'epoch': 0.97} + 97%|█████████▋| 11872/12188 [1:58:08<40:17, 7.65s/it] 97%|█████████▋| 11873/12188 [1:58:16<40:38, 7.74s/it] {'loss': 0.2856, 'grad_norm': 0.9266574771763675, 'learning_rate': 1.7618818666568226e-08, 'epoch': 0.97} + 97%|█████████▋| 11873/12188 [1:58:16<40:38, 7.74s/it] 97%|█████████▋| 11874/12188 [1:58:24<40:43, 7.78s/it] {'loss': 0.2697, 'grad_norm': 0.7075300691028347, 'learning_rate': 1.7507548617445857e-08, 'epoch': 0.97} + 97%|█████████▋| 11874/12188 [1:58:24<40:43, 7.78s/it] 97%|█████████▋| 11875/12188 [1:58:32<39:55, 7.65s/it] {'loss': 0.2871, 'grad_norm': 0.7199256206308239, 'learning_rate': 1.7396630423935356e-08, 'epoch': 0.97} + 97%|█████████▋| 11875/12188 [1:58:32<39:55, 7.65s/it] 97%|█████████▋| 11876/12188 [1:58:39<39:22, 7.57s/it] {'loss': 0.2917, 'grad_norm': 0.725469958253232, 'learning_rate': 1.7286064093869902e-08, 'epoch': 0.97} + 97%|█████████▋| 11876/12188 [1:58:39<39:22, 7.57s/it] 97%|█████████▋| 11877/12188 [1:58:47<39:27, 7.61s/it] {'loss': 0.3423, 'grad_norm': 0.8444639601217719, 'learning_rate': 1.7175849635057694e-08, 'epoch': 0.97} + 97%|█████████▋| 11877/12188 [1:58:47<39:27, 7.61s/it] 97%|█████████▋| 11878/12188 [1:58:56<41:45, 8.08s/it] {'loss': 0.2774, 'grad_norm': 0.9004385072924513, 'learning_rate': 1.7065987055280842e-08, 'epoch': 0.97} + 97%|█████████▋| 11878/12188 [1:58:56<41:45, 8.08s/it] 97%|█████████▋| 11879/12188 [1:59:04<41:00, 7.96s/it] {'loss': 0.2841, 'grad_norm': 0.7917666042840839, 'learning_rate': 1.6956476362298692e-08, 'epoch': 0.97} + 97%|█████████▋| 11879/12188 [1:59:04<41:00, 7.96s/it] 97%|█████████▋| 11880/12188 [1:59:13<42:51, 8.35s/it] {'loss': 0.3306, 'grad_norm': 0.7170991396043128, 'learning_rate': 1.6847317563843946e-08, 'epoch': 0.97} + 97%|█████████▋| 11880/12188 [1:59:13<42:51, 8.35s/it] 97%|█████████▋| 11881/12188 [1:59:22<43:17, 8.46s/it] {'loss': 0.2918, 'grad_norm': 0.9585170618591542, 'learning_rate': 1.6738510667625997e-08, 'epoch': 0.97} + 97%|█████████▋| 11881/12188 [1:59:22<43:17, 8.46s/it] 97%|█████████▋| 11882/12188 [1:59:29<41:49, 8.20s/it] {'loss': 0.2686, 'grad_norm': 0.8131578028725107, 'learning_rate': 1.6630055681327582e-08, 'epoch': 0.97} + 97%|█████████▋| 11882/12188 [1:59:29<41:49, 8.20s/it] 97%|█████████▋| 11883/12188 [1:59:36<40:19, 7.93s/it] {'loss': 0.2787, 'grad_norm': 0.6415692703131848, 'learning_rate': 1.6521952612608693e-08, 'epoch': 0.97} + 97%|█████████▋| 11883/12188 [1:59:36<40:19, 7.93s/it] 98%|█████████▊| 11884/12188 [1:59:44<39:20, 7.77s/it] {'loss': 0.3161, 'grad_norm': 0.7351592343705473, 'learning_rate': 1.6414201469102664e-08, 'epoch': 0.98} + 98%|█████████▊| 11884/12188 [1:59:44<39:20, 7.77s/it] 98%|█████████▊| 11885/12188 [1:59:51<38:47, 7.68s/it] {'loss': 0.2714, 'grad_norm': 0.6988460262753082, 'learning_rate': 1.630680225841952e-08, 'epoch': 0.98} + 98%|█████████▊| 11885/12188 [1:59:51<38:47, 7.68s/it] 98%|█████████▊| 11886/12188 [2:00:00<40:19, 8.01s/it] {'loss': 0.3092, 'grad_norm': 0.6823040237528494, 'learning_rate': 1.6199754988142635e-08, 'epoch': 0.98} + 98%|█████████▊| 11886/12188 [2:00:00<40:19, 8.01s/it] 98%|█████████▊| 11887/12188 [2:00:07<39:04, 7.79s/it] {'loss': 0.3051, 'grad_norm': 0.7139887048084075, 'learning_rate': 1.609305966583208e-08, 'epoch': 0.98} + 98%|█████████▊| 11887/12188 [2:00:07<39:04, 7.79s/it] 98%|█████████▊| 11888/12188 [2:00:15<38:14, 7.65s/it] {'loss': 0.2728, 'grad_norm': 0.6997460777205247, 'learning_rate': 1.5986716299022375e-08, 'epoch': 0.98} + 98%|█████████▊| 11888/12188 [2:00:15<38:14, 7.65s/it] 98%|█████████▊| 11889/12188 [2:00:22<37:23, 7.50s/it] {'loss': 0.306, 'grad_norm': 0.6983695816708014, 'learning_rate': 1.5880724895223077e-08, 'epoch': 0.98} + 98%|█████████▊| 11889/12188 [2:00:22<37:23, 7.50s/it] 98%|█████████▊| 11890/12188 [2:00:29<37:19, 7.51s/it] {'loss': 0.2654, 'grad_norm': 0.6234624448295786, 'learning_rate': 1.577508546191986e-08, 'epoch': 0.98} + 98%|█████████▊| 11890/12188 [2:00:29<37:19, 7.51s/it] 98%|█████████▊| 11891/12188 [2:00:40<41:25, 8.37s/it] {'loss': 0.335, 'grad_norm': 0.7629390906917347, 'learning_rate': 1.5669798006572313e-08, 'epoch': 0.98} + 98%|█████████▊| 11891/12188 [2:00:40<41:25, 8.37s/it] 98%|█████████▊| 11892/12188 [2:00:48<40:23, 8.19s/it] {'loss': 0.2741, 'grad_norm': 0.6330950119907233, 'learning_rate': 1.5564862536615598e-08, 'epoch': 0.98} + 98%|█████████▊| 11892/12188 [2:00:48<40:23, 8.19s/it] 98%|█████████▊| 11893/12188 [2:00:55<39:11, 7.97s/it] {'loss': 0.2891, 'grad_norm': 0.7726551055890479, 'learning_rate': 1.5460279059459903e-08, 'epoch': 0.98} + 98%|█████████▊| 11893/12188 [2:00:55<39:11, 7.97s/it] 98%|█████████▊| 11894/12188 [2:01:02<38:20, 7.82s/it] {'loss': 0.2943, 'grad_norm': 0.714112646427943, 'learning_rate': 1.535604758249154e-08, 'epoch': 0.98} + 98%|█████████▊| 11894/12188 [2:01:03<38:20, 7.82s/it] 98%|█████████▊| 11895/12188 [2:01:10<38:04, 7.80s/it] {'loss': 0.3189, 'grad_norm': 0.9619888988796041, 'learning_rate': 1.5252168113070177e-08, 'epoch': 0.98} + 98%|█████████▊| 11895/12188 [2:01:10<38:04, 7.80s/it] 98%|█████████▊| 11896/12188 [2:01:19<39:24, 8.10s/it] {'loss': 0.3331, 'grad_norm': 0.6562409572957998, 'learning_rate': 1.5148640658532164e-08, 'epoch': 0.98} + 98%|█████████▊| 11896/12188 [2:01:19<39:24, 8.10s/it] 98%|█████████▊| 11897/12188 [2:01:27<39:06, 8.06s/it] {'loss': 0.2717, 'grad_norm': 0.7818313746567441, 'learning_rate': 1.5045465226188882e-08, 'epoch': 0.98} + 98%|█████████▊| 11897/12188 [2:01:27<39:06, 8.06s/it] 98%|█████████▊| 11898/12188 [2:01:34<38:03, 7.88s/it] {'loss': 0.2935, 'grad_norm': 0.795521041301965, 'learning_rate': 1.4942641823325056e-08, 'epoch': 0.98} + 98%|█████████▊| 11898/12188 [2:01:34<38:03, 7.88s/it] 98%|█████████▊| 11899/12188 [2:01:44<40:25, 8.39s/it] {'loss': 0.2782, 'grad_norm': 0.6990926916574886, 'learning_rate': 1.4840170457203206e-08, 'epoch': 0.98} + 98%|█████████▊| 11899/12188 [2:01:44<40:25, 8.39s/it] 98%|█████████▊| 11900/12188 [2:01:52<39:44, 8.28s/it] {'loss': 0.2798, 'grad_norm': 0.6658979252999624, 'learning_rate': 1.4738051135059217e-08, 'epoch': 0.98} + 98%|█████████▊| 11900/12188 [2:01:52<39:44, 8.28s/it] 98%|█████████▊| 11901/12188 [2:02:00<39:26, 8.25s/it] {'loss': 0.335, 'grad_norm': 0.7487380992600067, 'learning_rate': 1.463628386410454e-08, 'epoch': 0.98} + 98%|█████████▊| 11901/12188 [2:02:00<39:26, 8.25s/it] 98%|█████████▊| 11902/12188 [2:02:08<38:13, 8.02s/it] {'loss': 0.2726, 'grad_norm': 0.6744265241032749, 'learning_rate': 1.4534868651526202e-08, 'epoch': 0.98} + 98%|█████████▊| 11902/12188 [2:02:08<38:13, 8.02s/it] 98%|█████████▊| 11903/12188 [2:02:15<37:06, 7.81s/it] {'loss': 0.2932, 'grad_norm': 0.6973889820684239, 'learning_rate': 1.4433805504485699e-08, 'epoch': 0.98} + 98%|█████████▊| 11903/12188 [2:02:15<37:06, 7.81s/it] 98%|█████████▊| 11904/12188 [2:02:23<37:01, 7.82s/it] {'loss': 0.3023, 'grad_norm': 0.7433379230425784, 'learning_rate': 1.4333094430119542e-08, 'epoch': 0.98} + 98%|█████████▊| 11904/12188 [2:02:23<37:01, 7.82s/it] 98%|█████████▊| 11905/12188 [2:02:30<36:14, 7.68s/it] {'loss': 0.2788, 'grad_norm': 0.6511351598282097, 'learning_rate': 1.4232735435540378e-08, 'epoch': 0.98} + 98%|█████████▊| 11905/12188 [2:02:30<36:14, 7.68s/it] 98%|█████████▊| 11906/12188 [2:02:38<36:09, 7.69s/it] {'loss': 0.3196, 'grad_norm': 0.6976879494252395, 'learning_rate': 1.4132728527835315e-08, 'epoch': 0.98} + 98%|█████████▊| 11906/12188 [2:02:38<36:09, 7.69s/it] 98%|█████████▊| 11907/12188 [2:02:45<35:33, 7.59s/it] {'loss': 0.3284, 'grad_norm': 0.777043327488366, 'learning_rate': 1.4033073714065926e-08, 'epoch': 0.98} + 98%|█████████▊| 11907/12188 [2:02:45<35:33, 7.59s/it] 98%|█████████▊| 11908/12188 [2:02:53<34:54, 7.48s/it] {'loss': 0.3072, 'grad_norm': 0.7183288543642025, 'learning_rate': 1.3933771001271023e-08, 'epoch': 0.98} + 98%|█████████▊| 11908/12188 [2:02:53<34:54, 7.48s/it] 98%|█████████▊| 11909/12188 [2:03:00<35:10, 7.56s/it] {'loss': 0.3209, 'grad_norm': 0.69828211192818, 'learning_rate': 1.3834820396462223e-08, 'epoch': 0.98} + 98%|█████████▊| 11909/12188 [2:03:00<35:10, 7.56s/it] 98%|█████████▊| 11910/12188 [2:03:08<35:27, 7.65s/it] {'loss': 0.2628, 'grad_norm': 0.6745426172229101, 'learning_rate': 1.3736221906627822e-08, 'epoch': 0.98} + 98%|█████████▊| 11910/12188 [2:03:08<35:27, 7.65s/it] 98%|█████████▊| 11911/12188 [2:03:16<35:51, 7.77s/it] {'loss': 0.2834, 'grad_norm': 0.8232738625304331, 'learning_rate': 1.363797553873003e-08, 'epoch': 0.98} + 98%|█████████▊| 11911/12188 [2:03:16<35:51, 7.77s/it] 98%|█████████▊| 11912/12188 [2:03:24<35:49, 7.79s/it] {'loss': 0.2882, 'grad_norm': 0.9397519763465695, 'learning_rate': 1.3540081299707187e-08, 'epoch': 0.98} + 98%|█████████▊| 11912/12188 [2:03:24<35:49, 7.79s/it] 98%|█████████▊| 11913/12188 [2:03:32<35:38, 7.78s/it] {'loss': 0.2662, 'grad_norm': 1.2200544403635472, 'learning_rate': 1.3442539196472647e-08, 'epoch': 0.98} + 98%|█████████▊| 11913/12188 [2:03:32<35:38, 7.78s/it] 98%|█████████▊| 11914/12188 [2:03:39<35:05, 7.68s/it] {'loss': 0.3048, 'grad_norm': 0.75353702828581, 'learning_rate': 1.334534923591424e-08, 'epoch': 0.98} + 98%|█████████▊| 11914/12188 [2:03:39<35:05, 7.68s/it] 98%|█████████▊| 11915/12188 [2:03:47<34:37, 7.61s/it] {'loss': 0.2824, 'grad_norm': 0.6671476460249791, 'learning_rate': 1.324851142489536e-08, 'epoch': 0.98} + 98%|█████████▊| 11915/12188 [2:03:47<34:37, 7.61s/it] 98%|█████████▊| 11916/12188 [2:03:55<35:17, 7.79s/it] {'loss': 0.3191, 'grad_norm': 0.801213933887738, 'learning_rate': 1.3152025770255539e-08, 'epoch': 0.98} + 98%|█████████▊| 11916/12188 [2:03:55<35:17, 7.79s/it] 98%|█████████▊| 11917/12188 [2:04:02<34:49, 7.71s/it] {'loss': 0.2768, 'grad_norm': 0.639686531346677, 'learning_rate': 1.3055892278807103e-08, 'epoch': 0.98} + 98%|█████████▊| 11917/12188 [2:04:02<34:49, 7.71s/it] 98%|█████████▊| 11918/12188 [2:04:11<35:17, 7.84s/it] {'loss': 0.285, 'grad_norm': 0.6653936719897269, 'learning_rate': 1.2960110957339623e-08, 'epoch': 0.98} + 98%|█████████▊| 11918/12188 [2:04:11<35:17, 7.84s/it] 98%|█████████▊| 11919/12188 [2:04:19<35:22, 7.89s/it] {'loss': 0.2962, 'grad_norm': 0.7087604380738076, 'learning_rate': 1.2864681812616575e-08, 'epoch': 0.98} + 98%|█████████▊| 11919/12188 [2:04:19<35:22, 7.89s/it] 98%|█████████▊| 11920/12188 [2:04:26<34:56, 7.82s/it] {'loss': 0.3032, 'grad_norm': 0.733306386948836, 'learning_rate': 1.276960485137757e-08, 'epoch': 0.98} + 98%|█████████▊| 11920/12188 [2:04:26<34:56, 7.82s/it] 98%|█████████▊| 11921/12188 [2:04:34<34:48, 7.82s/it] {'loss': 0.2738, 'grad_norm': 0.6921642552887733, 'learning_rate': 1.2674880080336682e-08, 'epoch': 0.98} + 98%|█████████▊| 11921/12188 [2:04:34<34:48, 7.82s/it] 98%|█████████▊| 11922/12188 [2:04:41<34:03, 7.68s/it] {'loss': 0.2815, 'grad_norm': 0.7573325721611717, 'learning_rate': 1.258050750618245e-08, 'epoch': 0.98} + 98%|█████████▊| 11922/12188 [2:04:41<34:03, 7.68s/it] 98%|█████████▊| 11923/12188 [2:04:49<33:32, 7.59s/it] {'loss': 0.2838, 'grad_norm': 0.6569582869595058, 'learning_rate': 1.2486487135580094e-08, 'epoch': 0.98} + 98%|█████████▊| 11923/12188 [2:04:49<33:32, 7.59s/it] 98%|█████████▊| 11924/12188 [2:04:57<33:43, 7.67s/it] {'loss': 0.2938, 'grad_norm': 0.7360793381463095, 'learning_rate': 1.2392818975168752e-08, 'epoch': 0.98} + 98%|█████████▊| 11924/12188 [2:04:57<33:43, 7.67s/it] 98%|█████████▊| 11925/12188 [2:05:06<35:19, 8.06s/it] {'loss': 0.2823, 'grad_norm': 0.7944275878684655, 'learning_rate': 1.2299503031563687e-08, 'epoch': 0.98} + 98%|█████████▊| 11925/12188 [2:05:06<35:19, 8.06s/it] 98%|█████████▊| 11926/12188 [2:05:13<34:52, 7.99s/it] {'loss': 0.3217, 'grad_norm': 0.7019107344168205, 'learning_rate': 1.2206539311354071e-08, 'epoch': 0.98} + 98%|█████████▊| 11926/12188 [2:05:13<34:52, 7.99s/it] 98%|█████████▊| 11927/12188 [2:05:21<33:48, 7.77s/it] {'loss': 0.2606, 'grad_norm': 0.699565143031579, 'learning_rate': 1.211392782110521e-08, 'epoch': 0.98} + 98%|█████████▊| 11927/12188 [2:05:21<33:48, 7.77s/it] 98%|█████████▊| 11928/12188 [2:05:28<32:56, 7.60s/it] {'loss': 0.2987, 'grad_norm': 0.6841079694710712, 'learning_rate': 1.2021668567357425e-08, 'epoch': 0.98} + 98%|█████████▊| 11928/12188 [2:05:28<32:56, 7.60s/it] 98%|█████████▊| 11929/12188 [2:05:35<32:46, 7.59s/it] {'loss': 0.2913, 'grad_norm': 0.7990467512646007, 'learning_rate': 1.1929761556625507e-08, 'epoch': 0.98} + 98%|█████████▊| 11929/12188 [2:05:35<32:46, 7.59s/it] 98%|█████████▊| 11930/12188 [2:05:43<32:23, 7.53s/it] {'loss': 0.3169, 'grad_norm': 0.6730102157035833, 'learning_rate': 1.183820679539982e-08, 'epoch': 0.98} + 98%|█████████▊| 11930/12188 [2:05:43<32:23, 7.53s/it] 98%|█████████▊| 11931/12188 [2:05:51<32:42, 7.64s/it] {'loss': 0.303, 'grad_norm': 0.7186480504521766, 'learning_rate': 1.1747004290145747e-08, 'epoch': 0.98} + 98%|█████████▊| 11931/12188 [2:05:51<32:42, 7.64s/it] 98%|█████████▊| 11932/12188 [2:05:58<32:20, 7.58s/it] {'loss': 0.2681, 'grad_norm': 0.6954729337214143, 'learning_rate': 1.1656154047303691e-08, 'epoch': 0.98} + 98%|█████████▊| 11932/12188 [2:05:58<32:20, 7.58s/it] 98%|█████████▊| 11933/12188 [2:06:07<33:32, 7.89s/it] {'loss': 0.3109, 'grad_norm': 0.6971602777615542, 'learning_rate': 1.1565656073290188e-08, 'epoch': 0.98} + 98%|█████████▊| 11933/12188 [2:06:07<33:32, 7.89s/it] 98%|█████████▊| 11934/12188 [2:06:15<33:16, 7.86s/it] {'loss': 0.2941, 'grad_norm': 0.7639642774163216, 'learning_rate': 1.147551037449568e-08, 'epoch': 0.98} + 98%|█████████▊| 11934/12188 [2:06:15<33:16, 7.86s/it] 98%|█████████▊| 11935/12188 [2:06:22<32:34, 7.72s/it] {'loss': 0.2882, 'grad_norm': 0.6885622107146153, 'learning_rate': 1.138571695728563e-08, 'epoch': 0.98} + 98%|█████████▊| 11935/12188 [2:06:22<32:34, 7.72s/it] 98%|█████████▊| 11936/12188 [2:06:29<31:59, 7.62s/it] {'loss': 0.3063, 'grad_norm': 0.7194506006520661, 'learning_rate': 1.1296275828001635e-08, 'epoch': 0.98} + 98%|█████████▊| 11936/12188 [2:06:29<31:59, 7.62s/it] 98%|█████████▊| 11937/12188 [2:06:37<32:26, 7.76s/it] {'loss': 0.3195, 'grad_norm': 0.7237481981748485, 'learning_rate': 1.1207186992959197e-08, 'epoch': 0.98} + 98%|█████████▊| 11937/12188 [2:06:37<32:26, 7.76s/it] 98%|█████████▊| 11938/12188 [2:06:46<32:49, 7.88s/it] {'loss': 0.2589, 'grad_norm': 0.672828819250741, 'learning_rate': 1.1118450458450503e-08, 'epoch': 0.98} + 98%|█████████▊| 11938/12188 [2:06:46<32:49, 7.88s/it] 98%|█████████▊| 11939/12188 [2:06:53<31:50, 7.67s/it] {'loss': 0.2757, 'grad_norm': 0.7242980807925332, 'learning_rate': 1.1030066230741099e-08, 'epoch': 0.98} + 98%|█████████▊| 11939/12188 [2:06:53<31:50, 7.67s/it] 98%|█████████▊| 11940/12188 [2:07:01<31:45, 7.68s/it] {'loss': 0.2929, 'grad_norm': 0.7017461368699519, 'learning_rate': 1.094203431607377e-08, 'epoch': 0.98} + 98%|█████████▊| 11940/12188 [2:07:01<31:45, 7.68s/it] 98%|█████████▊| 11941/12188 [2:07:08<31:29, 7.65s/it] {'loss': 0.2953, 'grad_norm': 0.6646447107136422, 'learning_rate': 1.0854354720664095e-08, 'epoch': 0.98} + 98%|█████████▊| 11941/12188 [2:07:08<31:29, 7.65s/it] 98%|█████████▊| 11942/12188 [2:07:15<31:04, 7.58s/it] {'loss': 0.2753, 'grad_norm': 0.7031329425762648, 'learning_rate': 1.0767027450703793e-08, 'epoch': 0.98} + 98%|█████████▊| 11942/12188 [2:07:16<31:04, 7.58s/it] 98%|█████████▊| 11943/12188 [2:07:24<31:55, 7.82s/it] {'loss': 0.2774, 'grad_norm': 0.7620013092535042, 'learning_rate': 1.0680052512360706e-08, 'epoch': 0.98} + 98%|█████████▊| 11943/12188 [2:07:24<31:55, 7.82s/it] 98%|█████████▊| 11944/12188 [2:07:32<32:27, 7.98s/it] {'loss': 0.2972, 'grad_norm': 0.6592802830515809, 'learning_rate': 1.0593429911776588e-08, 'epoch': 0.98} + 98%|█████████▊| 11944/12188 [2:07:32<32:27, 7.98s/it] 98%|█████████▊| 11945/12188 [2:07:40<32:20, 7.99s/it] {'loss': 0.2933, 'grad_norm': 0.7607778865242127, 'learning_rate': 1.0507159655067656e-08, 'epoch': 0.98} + 98%|█████████▊| 11945/12188 [2:07:40<32:20, 7.99s/it] 98%|█████████▊| 11946/12188 [2:07:51<35:34, 8.82s/it] {'loss': 0.2998, 'grad_norm': 0.6456257486821463, 'learning_rate': 1.0421241748327371e-08, 'epoch': 0.98} + 98%|█████████▊| 11946/12188 [2:07:51<35:34, 8.82s/it] 98%|█████████▊| 11947/12188 [2:07:59<34:00, 8.47s/it] {'loss': 0.3422, 'grad_norm': 0.8039681822834317, 'learning_rate': 1.0335676197622546e-08, 'epoch': 0.98} + 98%|█████████▊| 11947/12188 [2:07:59<34:00, 8.47s/it] 98%|█████████▊| 11948/12188 [2:08:06<32:53, 8.22s/it] {'loss': 0.292, 'grad_norm': 0.7150968206029614, 'learning_rate': 1.025046300899557e-08, 'epoch': 0.98} + 98%|█████████▊| 11948/12188 [2:08:06<32:53, 8.22s/it] 98%|█████████▊| 11949/12188 [2:08:14<31:40, 7.95s/it] {'loss': 0.2822, 'grad_norm': 0.6942269447264758, 'learning_rate': 1.0165602188464408e-08, 'epoch': 0.98} + 98%|█████████▊| 11949/12188 [2:08:14<31:40, 7.95s/it] 98%|█████████▊| 11950/12188 [2:08:23<33:34, 8.46s/it] {'loss': 0.3014, 'grad_norm': 0.7019116579158207, 'learning_rate': 1.0081093742021486e-08, 'epoch': 0.98} + 98%|█████████▊| 11950/12188 [2:08:23<33:34, 8.46s/it] 98%|█████████▊| 11951/12188 [2:08:31<32:39, 8.27s/it] {'loss': 0.2905, 'grad_norm': 0.7108022306725496, 'learning_rate': 9.996937675635365e-09, 'epoch': 0.98} + 98%|█████████▊| 11951/12188 [2:08:31<32:39, 8.27s/it] 98%|█████████▊| 11952/12188 [2:08:41<34:25, 8.75s/it] {'loss': 0.281, 'grad_norm': 0.6839908054594752, 'learning_rate': 9.913133995247959e-09, 'epoch': 0.98} + 98%|█████████▊| 11952/12188 [2:08:41<34:25, 8.75s/it] 98%|█████████▊| 11953/12188 [2:08:49<32:51, 8.39s/it] {'loss': 0.3505, 'grad_norm': 0.7359122627917103, 'learning_rate': 9.829682706777866e-09, 'epoch': 0.98} + 98%|█████████▊| 11953/12188 [2:08:49<32:51, 8.39s/it] 98%|█████████▊| 11954/12188 [2:08:56<31:21, 8.04s/it] {'loss': 0.3262, 'grad_norm': 0.7109664223038876, 'learning_rate': 9.74658381611815e-09, 'epoch': 0.98} + 98%|█████████▊| 11954/12188 [2:08:56<31:21, 8.04s/it] 98%|█████████▊| 11955/12188 [2:09:03<30:31, 7.86s/it] {'loss': 0.3187, 'grad_norm': 0.7206691906665511, 'learning_rate': 9.66383732913745e-09, 'epoch': 0.98} + 98%|█████████▊| 11955/12188 [2:09:03<30:31, 7.86s/it] 98%|█████████▊| 11956/12188 [2:09:11<30:01, 7.76s/it] {'loss': 0.307, 'grad_norm': 0.7883568229475316, 'learning_rate': 9.581443251678869e-09, 'epoch': 0.98} + 98%|█████████▊| 11956/12188 [2:09:11<30:01, 7.76s/it] 98%|█████████▊| 11957/12188 [2:09:18<29:23, 7.63s/it] {'loss': 0.282, 'grad_norm': 0.6727231035555945, 'learning_rate': 9.499401589561085e-09, 'epoch': 0.98} + 98%|█████████▊| 11957/12188 [2:09:18<29:23, 7.63s/it] 98%|█████████▊| 11958/12188 [2:09:25<28:58, 7.56s/it] {'loss': 0.2794, 'grad_norm': 0.6683989602523375, 'learning_rate': 9.4177123485778e-09, 'epoch': 0.98} + 98%|█████████▊| 11958/12188 [2:09:25<28:58, 7.56s/it] 98%|█████████▊| 11959/12188 [2:09:33<28:42, 7.52s/it] {'loss': 0.2798, 'grad_norm': 0.7199804268409076, 'learning_rate': 9.336375534497732e-09, 'epoch': 0.98} + 98%|█████████▊| 11959/12188 [2:09:33<28:42, 7.52s/it] 98%|█████████▊| 11960/12188 [2:09:40<28:39, 7.54s/it] {'loss': 0.2903, 'grad_norm': 0.709818663578977, 'learning_rate': 9.255391153064065e-09, 'epoch': 0.98} + 98%|█████████▊| 11960/12188 [2:09:40<28:39, 7.54s/it] 98%|█████████▊| 11961/12188 [2:09:48<28:48, 7.62s/it] {'loss': 0.2794, 'grad_norm': 0.6882524483479812, 'learning_rate': 9.17475920999722e-09, 'epoch': 0.98} + 98%|█████████▊| 11961/12188 [2:09:48<28:48, 7.62s/it] 98%|█████████▊| 11962/12188 [2:09:57<29:26, 7.82s/it] {'loss': 0.2733, 'grad_norm': 0.7533908860506451, 'learning_rate': 9.094479710990422e-09, 'epoch': 0.98} + 98%|█████████▊| 11962/12188 [2:09:57<29:26, 7.82s/it] 98%|█████████▊| 11963/12188 [2:10:05<29:37, 7.90s/it] {'loss': 0.2885, 'grad_norm': 0.685076286795829, 'learning_rate': 9.014552661712473e-09, 'epoch': 0.98} + 98%|█████████▊| 11963/12188 [2:10:05<29:37, 7.90s/it] 98%|█████████▊| 11964/12188 [2:10:12<28:58, 7.76s/it] {'loss': 0.2695, 'grad_norm': 0.6583766793320132, 'learning_rate': 8.934978067808852e-09, 'epoch': 0.98} + 98%|█████████▊| 11964/12188 [2:10:12<28:58, 7.76s/it] 98%|█████████▊| 11965/12188 [2:10:20<28:49, 7.75s/it] {'loss': 0.281, 'grad_norm': 0.7061661698360902, 'learning_rate': 8.855755934897847e-09, 'epoch': 0.98} + 98%|█████████▊| 11965/12188 [2:10:20<28:49, 7.75s/it] 98%|█████████▊| 11966/12188 [2:10:27<28:21, 7.66s/it] {'loss': 0.3184, 'grad_norm': 0.7284648064067792, 'learning_rate': 8.776886268574424e-09, 'epoch': 0.98} + 98%|█████████▊| 11966/12188 [2:10:27<28:21, 7.66s/it] 98%|█████████▊| 11967/12188 [2:10:35<27:47, 7.55s/it] {'loss': 0.2848, 'grad_norm': 0.6782327568288081, 'learning_rate': 8.698369074408575e-09, 'epoch': 0.98} + 98%|█████████▊| 11967/12188 [2:10:35<27:47, 7.55s/it] 98%|█████████▊| 11968/12188 [2:10:42<27:54, 7.61s/it] {'loss': 0.3112, 'grad_norm': 0.7069161449791997, 'learning_rate': 8.620204357944195e-09, 'epoch': 0.98} + 98%|█████████▊| 11968/12188 [2:10:42<27:54, 7.61s/it] 98%|█████████▊| 11969/12188 [2:10:50<27:37, 7.57s/it] {'loss': 0.2931, 'grad_norm': 0.7367157240263079, 'learning_rate': 8.542392124702426e-09, 'epoch': 0.98} + 98%|█████████▊| 11969/12188 [2:10:50<27:37, 7.57s/it] 98%|█████████▊| 11970/12188 [2:10:57<27:18, 7.51s/it] {'loss': 0.2877, 'grad_norm': 1.6756490422597825, 'learning_rate': 8.464932380177204e-09, 'epoch': 0.98} + 98%|█████████▊| 11970/12188 [2:10:57<27:18, 7.51s/it] 98%|█████████▊| 11971/12188 [2:11:05<27:16, 7.54s/it] {'loss': 0.2961, 'grad_norm': 0.7001808207141774, 'learning_rate': 8.387825129839155e-09, 'epoch': 0.98} + 98%|█████████▊| 11971/12188 [2:11:05<27:16, 7.54s/it] 98%|█████████▊| 11972/12188 [2:11:13<27:46, 7.72s/it] {'loss': 0.2538, 'grad_norm': 0.6612142447106969, 'learning_rate': 8.311070379132812e-09, 'epoch': 0.98} + 98%|█████████▊| 11972/12188 [2:11:13<27:46, 7.72s/it] 98%|█████████▊| 11973/12188 [2:11:20<27:19, 7.63s/it] {'loss': 0.2616, 'grad_norm': 0.6903208120527958, 'learning_rate': 8.234668133479395e-09, 'epoch': 0.98} + 98%|█████████▊| 11973/12188 [2:11:20<27:19, 7.63s/it] 98%|█████████▊| 11974/12188 [2:11:28<27:00, 7.57s/it] {'loss': 0.31, 'grad_norm': 0.6968877377107683, 'learning_rate': 8.158618398273477e-09, 'epoch': 0.98} + 98%|█████████▊| 11974/12188 [2:11:28<27:00, 7.57s/it] 98%|█████████▊| 11975/12188 [2:11:38<29:31, 8.32s/it] {'loss': 0.2771, 'grad_norm': 0.6426872937035143, 'learning_rate': 8.082921178886316e-09, 'epoch': 0.98} + 98%|█████████▊| 11975/12188 [2:11:38<29:31, 8.32s/it] 98%|█████████▊| 11976/12188 [2:11:46<28:47, 8.15s/it] {'loss': 0.2678, 'grad_norm': 0.7671479234049666, 'learning_rate': 8.007576480663082e-09, 'epoch': 0.98} + 98%|█████████▊| 11976/12188 [2:11:46<28:47, 8.15s/it] 98%|█████████▊| 11977/12188 [2:11:53<28:23, 8.07s/it] {'loss': 0.298, 'grad_norm': 0.6527261330528418, 'learning_rate': 7.932584308923962e-09, 'epoch': 0.98} + 98%|█████████▊| 11977/12188 [2:11:53<28:23, 8.07s/it] 98%|█████████▊| 11978/12188 [2:12:01<27:29, 7.85s/it] {'loss': 0.3018, 'grad_norm': 0.8489005547606931, 'learning_rate': 7.857944668965833e-09, 'epoch': 0.98} + 98%|█████████▊| 11978/12188 [2:12:01<27:29, 7.85s/it] 98%|█████████▊| 11979/12188 [2:12:08<26:59, 7.75s/it] {'loss': 0.3196, 'grad_norm': 0.7409085178766682, 'learning_rate': 7.783657566058922e-09, 'epoch': 0.98} + 98%|█████████▊| 11979/12188 [2:12:08<26:59, 7.75s/it] 98%|█████████▊| 11980/12188 [2:12:16<26:48, 7.73s/it] {'loss': 0.294, 'grad_norm': 0.7353703539712663, 'learning_rate': 7.709723005450143e-09, 'epoch': 0.98} + 98%|█████████▊| 11980/12188 [2:12:16<26:48, 7.73s/it] 98%|█████████▊| 11981/12188 [2:12:24<26:34, 7.70s/it] {'loss': 0.2989, 'grad_norm': 0.7040919664915308, 'learning_rate': 7.636140992359209e-09, 'epoch': 0.98} + 98%|█████████▊| 11981/12188 [2:12:24<26:34, 7.70s/it] 98%|█████████▊| 11982/12188 [2:12:32<26:48, 7.81s/it] {'loss': 0.2807, 'grad_norm': 0.7680694340337009, 'learning_rate': 7.56291153198363e-09, 'epoch': 0.98} + 98%|█████████▊| 11982/12188 [2:12:32<26:48, 7.81s/it] 98%|█████████▊| 11983/12188 [2:12:39<26:11, 7.66s/it] {'loss': 0.2743, 'grad_norm': 0.7252608993747147, 'learning_rate': 7.490034629494269e-09, 'epoch': 0.98} + 98%|█████████▊| 11983/12188 [2:12:39<26:11, 7.66s/it] 98%|█████████▊| 11984/12188 [2:12:46<25:42, 7.56s/it] {'loss': 0.3252, 'grad_norm': 0.729418249350231, 'learning_rate': 7.417510290037011e-09, 'epoch': 0.98} + 98%|█████████▊| 11984/12188 [2:12:46<25:42, 7.56s/it] 98%|█████████▊| 11985/12188 [2:12:54<25:24, 7.51s/it] {'loss': 0.2875, 'grad_norm': 0.7734436958455039, 'learning_rate': 7.345338518734979e-09, 'epoch': 0.98} + 98%|█████████▊| 11985/12188 [2:12:54<25:24, 7.51s/it] 98%|█████████▊| 11986/12188 [2:13:01<25:31, 7.58s/it] {'loss': 0.3242, 'grad_norm': 0.6889613779948398, 'learning_rate': 7.273519320682987e-09, 'epoch': 0.98} + 98%|█████████▊| 11986/12188 [2:13:01<25:31, 7.58s/it] 98%|█████████▊| 11987/12188 [2:13:09<25:32, 7.62s/it] {'loss': 0.2895, 'grad_norm': 0.6677423937012755, 'learning_rate': 7.202052700954198e-09, 'epoch': 0.98} + 98%|█████████▊| 11987/12188 [2:13:09<25:32, 7.62s/it] 98%|█████████▊| 11988/12188 [2:13:18<26:32, 7.96s/it] {'loss': 0.2835, 'grad_norm': 0.6561968681979079, 'learning_rate': 7.130938664594578e-09, 'epoch': 0.98} + 98%|█████████▊| 11988/12188 [2:13:18<26:32, 7.96s/it] 98%|█████████▊| 11989/12188 [2:13:25<25:57, 7.83s/it] {'loss': 0.2781, 'grad_norm': 0.7069317137126904, 'learning_rate': 7.060177216626218e-09, 'epoch': 0.98} + 98%|█████████▊| 11989/12188 [2:13:25<25:57, 7.83s/it] 98%|█████████▊| 11990/12188 [2:13:33<25:32, 7.74s/it] {'loss': 0.3068, 'grad_norm': 0.7195869348752281, 'learning_rate': 6.9897683620467894e-09, 'epoch': 0.98} + 98%|█████████▊| 11990/12188 [2:13:33<25:32, 7.74s/it] 98%|█████████▊| 11991/12188 [2:13:42<26:12, 7.98s/it] {'loss': 0.2804, 'grad_norm': 0.7430286496472137, 'learning_rate': 6.91971210582787e-09, 'epoch': 0.98} + 98%|█████████▊| 11991/12188 [2:13:42<26:12, 7.98s/it] 98%|█████████▊| 11992/12188 [2:13:50<26:35, 8.14s/it] {'loss': 0.3033, 'grad_norm': 0.7079335504836638, 'learning_rate': 6.850008452916612e-09, 'epoch': 0.98} + 98%|█████████▊| 11992/12188 [2:13:50<26:35, 8.14s/it] 98%|█████████▊| 11993/12188 [2:13:58<26:17, 8.09s/it] {'loss': 0.3334, 'grad_norm': 0.7553456280106791, 'learning_rate': 6.780657408236302e-09, 'epoch': 0.98} + 98%|█████████▊| 11993/12188 [2:13:58<26:17, 8.09s/it] 98%|█████████▊| 11994/12188 [2:14:07<26:53, 8.31s/it] {'loss': 0.2927, 'grad_norm': 0.6713499276859001, 'learning_rate': 6.711658976683022e-09, 'epoch': 0.98} + 98%|█████████▊| 11994/12188 [2:14:07<26:53, 8.31s/it] 98%|█████████▊| 11995/12188 [2:14:14<25:43, 8.00s/it] {'loss': 0.2894, 'grad_norm': 0.7015279011415045, 'learning_rate': 6.643013163130651e-09, 'epoch': 0.98} + 98%|█████████▊| 11995/12188 [2:14:14<25:43, 8.00s/it] 98%|█████████▊| 11996/12188 [2:14:22<25:51, 8.08s/it] {'loss': 0.2852, 'grad_norm': 0.6781556960230659, 'learning_rate': 6.574719972425869e-09, 'epoch': 0.98} + 98%|█████████▊| 11996/12188 [2:14:22<25:51, 8.08s/it] 98%|█████████▊| 11997/12188 [2:14:30<24:58, 7.84s/it] {'loss': 0.3053, 'grad_norm': 0.7498155541617796, 'learning_rate': 6.506779409392039e-09, 'epoch': 0.98} + 98%|█████████▊| 11997/12188 [2:14:30<24:58, 7.84s/it] 98%|█████████▊| 11998/12188 [2:14:37<24:32, 7.75s/it] {'loss': 0.2795, 'grad_norm': 0.7190099338177652, 'learning_rate': 6.439191478826434e-09, 'epoch': 0.98} + 98%|█████████▊| 11998/12188 [2:14:37<24:32, 7.75s/it] 98%|█████████▊| 11999/12188 [2:14:45<24:03, 7.64s/it] {'loss': 0.2767, 'grad_norm': 0.6293185521186591, 'learning_rate': 6.3719561855024595e-09, 'epoch': 0.98} + 98%|█████████▊| 11999/12188 [2:14:45<24:03, 7.64s/it] 98%|█████████▊| 12000/12188 [2:14:52<23:46, 7.59s/it] {'loss': 0.2692, 'grad_norm': 0.7543702907098014, 'learning_rate': 6.305073534168538e-09, 'epoch': 0.98} + 98%|█████████▊| 12000/12188 [2:14:52<23:46, 7.59s/it]/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/torch/utils/checkpoint.py:87: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( + 98%|█████████▊| 12001/12188 [2:15:20<42:47, 13.73s/it] {'loss': 0.345, 'grad_norm': 0.6992228380559786, 'learning_rate': 6.238543529547003e-09, 'epoch': 0.98} + 98%|█████████▊| 12001/12188 [2:15:20<42:47, 13.73s/it] 98%|█████████▊| 12002/12188 [2:15:28<37:03, 11.95s/it] {'loss': 0.3011, 'grad_norm': 0.7118215211067631, 'learning_rate': 6.17236617633632e-09, 'epoch': 0.98} + 98%|█████████▊| 12002/12188 [2:15:28<37:03, 11.95s/it] 98%|█████████▊| 12003/12188 [2:15:36<33:07, 10.74s/it] {'loss': 0.3074, 'grad_norm': 0.7276285837542504, 'learning_rate': 6.106541479209971e-09, 'epoch': 0.98} + 98%|█████████▊| 12003/12188 [2:15:36<33:07, 10.74s/it] 98%|█████████▊| 12004/12188 [2:15:43<29:47, 9.72s/it] {'loss': 0.2936, 'grad_norm': 0.7598208439108629, 'learning_rate': 6.041069442816461e-09, 'epoch': 0.98} + 98%|█████████▊| 12004/12188 [2:15:43<29:47, 9.72s/it] 98%|█████████▊| 12005/12188 [2:15:51<28:07, 9.22s/it] {'loss': 0.3152, 'grad_norm': 0.7337695879847373, 'learning_rate': 5.975950071779313e-09, 'epoch': 0.98} + 98%|█████████▊| 12005/12188 [2:15:51<28:07, 9.22s/it] 99%|█████████▊| 12006/12188 [2:16:01<28:54, 9.53s/it] {'loss': 0.2858, 'grad_norm': 0.6880338602086229, 'learning_rate': 5.911183370697071e-09, 'epoch': 0.99} + 99%|█████████▊| 12006/12188 [2:16:01<28:54, 9.53s/it] 99%|█████████▊| 12007/12188 [2:16:10<27:39, 9.17s/it] {'loss': 0.2967, 'grad_norm': 0.6567432551314125, 'learning_rate': 5.846769344143299e-09, 'epoch': 0.99} + 99%|█████████▊| 12007/12188 [2:16:10<27:39, 9.17s/it] 99%|█████████▊| 12008/12188 [2:16:17<26:03, 8.69s/it] {'loss': 0.2986, 'grad_norm': 0.6585994226388779, 'learning_rate': 5.782707996667136e-09, 'epoch': 0.99} + 99%|█████████▊| 12008/12188 [2:16:17<26:03, 8.69s/it] 99%|█████████▊| 12009/12188 [2:16:25<25:21, 8.50s/it] {'loss': 0.2846, 'grad_norm': 0.6983495807601585, 'learning_rate': 5.718999332792185e-09, 'epoch': 0.99} + 99%|█████████▊| 12009/12188 [2:16:25<25:21, 8.50s/it] 99%|█████████▊| 12010/12188 [2:16:35<26:06, 8.80s/it] {'loss': 0.2941, 'grad_norm': 0.6921996599131651, 'learning_rate': 5.655643357017071e-09, 'epoch': 0.99} + 99%|█████████▊| 12010/12188 [2:16:35<26:06, 8.80s/it] 99%|█████████▊| 12011/12188 [2:16:46<27:44, 9.40s/it] {'loss': 0.2975, 'grad_norm': 0.6906143781070033, 'learning_rate': 5.592640073817102e-09, 'epoch': 0.99} + 99%|█████████▊| 12011/12188 [2:16:46<27:44, 9.40s/it] 99%|█████████▊| 12012/12188 [2:16:53<25:47, 8.79s/it] {'loss': 0.2665, 'grad_norm': 0.6740027001708184, 'learning_rate': 5.529989487640386e-09, 'epoch': 0.99} + 99%|█████████▊| 12012/12188 [2:16:53<25:47, 8.79s/it] 99%|█████████▊| 12013/12188 [2:17:00<24:25, 8.37s/it] {'loss': 0.3458, 'grad_norm': 0.7287744002681045, 'learning_rate': 5.467691602912273e-09, 'epoch': 0.99} + 99%|█████████▊| 12013/12188 [2:17:01<24:25, 8.37s/it] 99%|█████████▊| 12014/12188 [2:17:08<23:25, 8.08s/it] {'loss': 0.2781, 'grad_norm': 0.7373008380270086, 'learning_rate': 5.405746424030356e-09, 'epoch': 0.99} + 99%|█████████▊| 12014/12188 [2:17:08<23:25, 8.08s/it] 99%|█████████▊| 12015/12188 [2:17:15<22:45, 7.89s/it] {'loss': 0.3089, 'grad_norm': 0.6401282439819119, 'learning_rate': 5.344153955371134e-09, 'epoch': 0.99} + 99%|█████████▊| 12015/12188 [2:17:15<22:45, 7.89s/it] 99%|█████████▊| 12016/12188 [2:17:23<22:22, 7.80s/it] {'loss': 0.3103, 'grad_norm': 0.7552793115058015, 'learning_rate': 5.282914201282796e-09, 'epoch': 0.99} + 99%|█████████▊| 12016/12188 [2:17:23<22:22, 7.80s/it] 99%|█████████▊| 12017/12188 [2:17:30<21:52, 7.67s/it] {'loss': 0.3303, 'grad_norm': 0.6774891491421445, 'learning_rate': 5.2220271660907705e-09, 'epoch': 0.99} + 99%|█████████▊| 12017/12188 [2:17:30<21:52, 7.67s/it] 99%|█████████▊| 12018/12188 [2:17:38<21:45, 7.68s/it] {'loss': 0.2804, 'grad_norm': 0.7028793595978021, 'learning_rate': 5.1614928540938416e-09, 'epoch': 0.99} + 99%|█████████▊| 12018/12188 [2:17:38<21:45, 7.68s/it] 99%|█████████▊| 12019/12188 [2:17:45<21:21, 7.59s/it] {'loss': 0.3067, 'grad_norm': 0.6837449675988525, 'learning_rate': 5.101311269568032e-09, 'epoch': 0.99} + 99%|█████████▊| 12019/12188 [2:17:45<21:21, 7.59s/it] 99%|█████████▊| 12020/12188 [2:17:53<21:13, 7.58s/it] {'loss': 0.3127, 'grad_norm': 0.7156130867120302, 'learning_rate': 5.041482416762167e-09, 'epoch': 0.99} + 99%|█████████▊| 12020/12188 [2:17:53<21:13, 7.58s/it] 99%|█████████▊| 12021/12188 [2:18:01<21:23, 7.69s/it] {'loss': 0.2974, 'grad_norm': 0.8594639654237166, 'learning_rate': 4.982006299902309e-09, 'epoch': 0.99} + 99%|█████████▊| 12021/12188 [2:18:01<21:23, 7.69s/it] 99%|█████████▊| 12022/12188 [2:18:08<20:59, 7.59s/it] {'loss': 0.3293, 'grad_norm': 0.7942213068926728, 'learning_rate': 4.922882923187877e-09, 'epoch': 0.99} + 99%|█████████▊| 12022/12188 [2:18:08<20:59, 7.59s/it] 99%|█████████▊| 12023/12188 [2:18:16<20:38, 7.50s/it] {'loss': 0.3286, 'grad_norm': 0.8053987759660884, 'learning_rate': 4.864112290793865e-09, 'epoch': 0.99} + 99%|█████████▊| 12023/12188 [2:18:16<20:38, 7.50s/it] 99%|█████████▊| 12024/12188 [2:18:23<20:48, 7.61s/it] {'loss': 0.2819, 'grad_norm': 0.7248724264506464, 'learning_rate': 4.805694406871397e-09, 'epoch': 0.99} + 99%|█████████▊| 12024/12188 [2:18:23<20:48, 7.61s/it] 99%|█████████▊| 12025/12188 [2:18:31<20:26, 7.52s/it] {'loss': 0.2862, 'grad_norm': 0.8473175290064431, 'learning_rate': 4.747629275545507e-09, 'epoch': 0.99} + 99%|█████████▊| 12025/12188 [2:18:31<20:26, 7.52s/it] 99%|█████████▊| 12026/12188 [2:18:38<20:25, 7.57s/it] {'loss': 0.273, 'grad_norm': 0.7456595169778325, 'learning_rate': 4.689916900916247e-09, 'epoch': 0.99} + 99%|█████████▊| 12026/12188 [2:18:38<20:25, 7.57s/it] 99%|█████████▊| 12027/12188 [2:18:47<21:32, 8.03s/it] {'loss': 0.3328, 'grad_norm': 1.1427773337262186, 'learning_rate': 4.632557287059803e-09, 'epoch': 0.99} + 99%|█████████▊| 12027/12188 [2:18:47<21:32, 8.03s/it] 99%|█████████▊| 12028/12188 [2:18:55<20:54, 7.84s/it] {'loss': 0.3261, 'grad_norm': 0.6375337520383748, 'learning_rate': 4.575550438026266e-09, 'epoch': 0.99} + 99%|█████████▊| 12028/12188 [2:18:55<20:54, 7.84s/it] 99%|█████████▊| 12029/12188 [2:19:02<20:27, 7.72s/it] {'loss': 0.3231, 'grad_norm': 0.6728120066841088, 'learning_rate': 4.518896357841307e-09, 'epoch': 0.99} + 99%|█████████▊| 12029/12188 [2:19:02<20:27, 7.72s/it] 99%|█████████▊| 12030/12188 [2:19:10<20:26, 7.76s/it] {'loss': 0.2764, 'grad_norm': 0.8318011187278251, 'learning_rate': 4.462595050506724e-09, 'epoch': 0.99} + 99%|█████████▊| 12030/12188 [2:19:10<20:26, 7.76s/it] 99%|█████████▊| 12031/12188 [2:19:18<19:57, 7.63s/it] {'loss': 0.2899, 'grad_norm': 0.7232094921558797, 'learning_rate': 4.406646519997115e-09, 'epoch': 0.99} + 99%|█████████▊| 12031/12188 [2:19:18<19:57, 7.63s/it] 99%|█████████▊| 12032/12188 [2:19:25<19:45, 7.60s/it] {'loss': 0.3222, 'grad_norm': 0.6923838487527949, 'learning_rate': 4.35105077026432e-09, 'epoch': 0.99} + 99%|█████████▊| 12032/12188 [2:19:25<19:45, 7.60s/it] 99%|█████████▊| 12033/12188 [2:19:32<19:25, 7.52s/it] {'loss': 0.3039, 'grad_norm': 0.7349402932591843, 'learning_rate': 4.295807805234087e-09, 'epoch': 0.99} + 99%|█████████▊| 12033/12188 [2:19:32<19:25, 7.52s/it] 99%|█████████▊| 12034/12188 [2:19:40<19:06, 7.44s/it] {'loss': 0.2741, 'grad_norm': 0.7544504098259628, 'learning_rate': 4.240917628808294e-09, 'epoch': 0.99} + 99%|█████████▊| 12034/12188 [2:19:40<19:06, 7.44s/it] 99%|█████████▊| 12035/12188 [2:19:47<19:03, 7.48s/it] {'loss': 0.2482, 'grad_norm': 0.6678888435465737, 'learning_rate': 4.186380244862176e-09, 'epoch': 0.99} + 99%|█████████▊| 12035/12188 [2:19:47<19:03, 7.48s/it] 99%|█████████▉| 12036/12188 [2:19:55<19:07, 7.55s/it] {'loss': 0.2808, 'grad_norm': 0.6985729390892562, 'learning_rate': 4.132195657247095e-09, 'epoch': 0.99} + 99%|█████████▉| 12036/12188 [2:19:55<19:07, 7.55s/it] 99%|█████████▉| 12037/12188 [2:20:03<19:32, 7.77s/it] {'loss': 0.3165, 'grad_norm': 0.705351190784906, 'learning_rate': 4.078363869790547e-09, 'epoch': 0.99} + 99%|█████████▉| 12037/12188 [2:20:03<19:32, 7.77s/it] 99%|█████████▉| 12038/12188 [2:20:11<19:20, 7.74s/it] {'loss': 0.3137, 'grad_norm': 0.7253546330145214, 'learning_rate': 4.024884886292823e-09, 'epoch': 0.99} + 99%|█████████▉| 12038/12188 [2:20:11<19:20, 7.74s/it] 99%|█████████▉| 12039/12188 [2:20:18<18:52, 7.60s/it] {'loss': 0.333, 'grad_norm': 0.6897963417624674, 'learning_rate': 3.971758710531459e-09, 'epoch': 0.99} + 99%|█████████▉| 12039/12188 [2:20:18<18:52, 7.60s/it] 99%|█���███████▉| 12040/12188 [2:20:27<19:39, 7.97s/it] {'loss': 0.2962, 'grad_norm': 0.6692845286912422, 'learning_rate': 3.9189853462573424e-09, 'epoch': 0.99} + 99%|█████████▉| 12040/12188 [2:20:27<19:39, 7.97s/it] 99%|█████████▉| 12041/12188 [2:20:34<18:59, 7.75s/it] {'loss': 0.2942, 'grad_norm': 0.8251751968423249, 'learning_rate': 3.866564797198047e-09, 'epoch': 0.99} + 99%|█████████▉| 12041/12188 [2:20:34<18:59, 7.75s/it] 99%|█████████▉| 12042/12188 [2:20:42<18:52, 7.76s/it] {'loss': 0.3263, 'grad_norm': 0.7123383575000505, 'learning_rate': 3.814497067055056e-09, 'epoch': 0.99} + 99%|█████████▉| 12042/12188 [2:20:42<18:52, 7.76s/it] 99%|█████████▉| 12043/12188 [2:20:49<18:25, 7.63s/it] {'loss': 0.3231, 'grad_norm': 0.7174445090833846, 'learning_rate': 3.762782159505429e-09, 'epoch': 0.99} + 99%|█████████▉| 12043/12188 [2:20:49<18:25, 7.63s/it] 99%|█████████▉| 12044/12188 [2:20:57<18:09, 7.56s/it] {'loss': 0.3116, 'grad_norm': 0.6632632721833733, 'learning_rate': 3.7114200782006894e-09, 'epoch': 0.99} + 99%|█████████▉| 12044/12188 [2:20:57<18:09, 7.56s/it] 99%|█████████▉| 12045/12188 [2:21:04<17:58, 7.54s/it] {'loss': 0.3023, 'grad_norm': 0.7923863440655751, 'learning_rate': 3.6604108267684903e-09, 'epoch': 0.99} + 99%|█████████▉| 12045/12188 [2:21:04<17:58, 7.54s/it] 99%|█████████▉| 12046/12188 [2:21:12<18:16, 7.72s/it] {'loss': 0.2814, 'grad_norm': 0.6229596018928063, 'learning_rate': 3.6097544088103954e-09, 'epoch': 0.99} + 99%|█████████▉| 12046/12188 [2:21:12<18:16, 7.72s/it] 99%|█████████▉| 12047/12188 [2:21:20<17:58, 7.65s/it] {'loss': 0.2983, 'grad_norm': 0.6859287291961409, 'learning_rate': 3.5594508279046534e-09, 'epoch': 0.99} + 99%|█████████▉| 12047/12188 [2:21:20<17:58, 7.65s/it] 99%|█████████▉| 12048/12188 [2:21:29<18:48, 8.06s/it] {'loss': 0.2452, 'grad_norm': 0.7371465586414937, 'learning_rate': 3.5095000876028683e-09, 'epoch': 0.99} + 99%|█████████▉| 12048/12188 [2:21:29<18:48, 8.06s/it] 99%|█████████▉| 12049/12188 [2:21:36<18:12, 7.86s/it] {'loss': 0.3394, 'grad_norm': 0.8800825505106061, 'learning_rate': 3.4599021914327737e-09, 'epoch': 0.99} + 99%|█████████▉| 12049/12188 [2:21:36<18:12, 7.86s/it] 99%|█████████▉| 12050/12188 [2:21:44<17:47, 7.73s/it] {'loss': 0.3093, 'grad_norm': 0.7524285786067918, 'learning_rate': 3.4106571428971223e-09, 'epoch': 0.99} + 99%|█████████▉| 12050/12188 [2:21:44<17:47, 7.73s/it] 99%|█████████▉| 12051/12188 [2:21:51<17:38, 7.73s/it] {'loss': 0.2953, 'grad_norm': 0.6982090757300221, 'learning_rate': 3.361764945473134e-09, 'epoch': 0.99} + 99%|█████████▉| 12051/12188 [2:21:51<17:38, 7.73s/it] 99%|█████████▉| 12052/12188 [2:22:00<17:59, 7.94s/it] {'loss': 0.3091, 'grad_norm': 0.7205202202714382, 'learning_rate': 3.313225602613046e-09, 'epoch': 0.99} + 99%|█████████▉| 12052/12188 [2:22:00<17:59, 7.94s/it] 99%|█████████▉| 12053/12188 [2:22:07<17:28, 7.77s/it] {'loss': 0.31, 'grad_norm': 0.688901246974096, 'learning_rate': 3.265039117745783e-09, 'epoch': 0.99} + 99%|█████████▉| 12053/12188 [2:22:07<17:28, 7.77s/it] 99%|█████████▉| 12054/12188 [2:22:15<17:27, 7.82s/it] {'loss': 0.3482, 'grad_norm': 0.6611143822378295, 'learning_rate': 3.217205494273623e-09, 'epoch': 0.99} + 99%|█████████▉| 12054/12188 [2:22:15<17:27, 7.82s/it] 99%|█████████▉| 12055/12188 [2:22:22<17:00, 7.67s/it] {'loss': 0.2945, 'grad_norm': 0.7112119219029972, 'learning_rate': 3.1697247355738646e-09, 'epoch': 0.99} + 99%|█████████▉| 12055/12188 [2:22:22<17:00, 7.67s/it] 99%|█████████▉| 12056/12188 [2:22:30<16:53, 7.68s/it] {'loss': 0.2424, 'grad_norm': 0.7204285057128671, 'learning_rate': 3.1225968450004916e-09, 'epoch': 0.99} + 99%|█████████▉| 12056/12188 [2:22:30<16:53, 7.68s/it] 99%|█████████▉| 12057/12188 [2:22:38<16:38, 7.63s/it] {'loss': 0.2909, 'grad_norm': 0.717939218646702, 'learning_rate': 3.0758218258813977e-09, 'epoch': 0.99} + 99%|█████████▉| 12057/12188 [2:22:38<16:38, 7.63s/it] 99%|█████████▉| 12058/12188 [2:22:45<16:31, 7.63s/it] {'loss': 0.2972, 'grad_norm': 0.7425115451097501, 'learning_rate': 3.0293996815194958e-09, 'epoch': 0.99} + 99%|█████████▉| 12058/12188 [2:22:45<16:31, 7.63s/it] 99%|█████████▉| 12059/12188 [2:22:53<16:16, 7.57s/it] {'loss': 0.2639, 'grad_norm': 0.6955731571090201, 'learning_rate': 2.98333041519272e-09, 'epoch': 0.99} + 99%|█████████▉| 12059/12188 [2:22:53<16:16, 7.57s/it] 99%|█████████▉| 12060/12188 [2:23:01<16:35, 7.77s/it] {'loss': 0.288, 'grad_norm': 0.8105589684058404, 'learning_rate': 2.9376140301556887e-09, 'epoch': 0.99} + 99%|█████████▉| 12060/12188 [2:23:01<16:35, 7.77s/it] 99%|█████████▉| 12061/12188 [2:23:09<16:46, 7.93s/it] {'loss': 0.282, 'grad_norm': 0.659765959788263, 'learning_rate': 2.8922505296352654e-09, 'epoch': 0.99} + 99%|█████████▉| 12061/12188 [2:23:09<16:46, 7.93s/it] 99%|█████████▉| 12062/12188 [2:23:17<16:17, 7.76s/it] {'loss': 0.3102, 'grad_norm': 1.1717919775445684, 'learning_rate': 2.847239916836109e-09, 'epoch': 0.99} + 99%|█████████▉| 12062/12188 [2:23:17<16:17, 7.76s/it] 99%|█████████▉| 12063/12188 [2:23:25<16:24, 7.88s/it] {'loss': 0.2917, 'grad_norm': 0.7173302947912172, 'learning_rate': 2.8025821949356768e-09, 'epoch': 0.99} + 99%|█████████▉| 12063/12188 [2:23:25<16:24, 7.88s/it] 99%|█████████▉| 12064/12188 [2:23:32<16:06, 7.79s/it] {'loss': 0.2656, 'grad_norm': 0.7177206529740717, 'learning_rate': 2.7582773670886685e-09, 'epoch': 0.99} + 99%|█████████▉| 12064/12188 [2:23:32<16:06, 7.79s/it] 99%|█████████▉| 12065/12188 [2:23:40<16:07, 7.86s/it] {'loss': 0.2992, 'grad_norm': 0.7606218725820786, 'learning_rate': 2.7143254364236925e-09, 'epoch': 0.99} + 99%|█████████▉| 12065/12188 [2:23:40<16:07, 7.86s/it] 99%|█████████▉| 12066/12188 [2:23:48<15:49, 7.78s/it] {'loss': 0.2852, 'grad_norm': 0.740253589915343, 'learning_rate': 2.670726406043822e-09, 'epoch': 0.99} + 99%|█████████▉| 12066/12188 [2:23:48<15:49, 7.78s/it] 99%|█████████▉| 12067/12188 [2:23:56<15:56, 7.90s/it] {'loss': 0.2978, 'grad_norm': 0.9811550190074441, 'learning_rate': 2.6274802790288156e-09, 'epoch': 0.99} + 99%|█████████▉| 12067/12188 [2:23:56<15:56, 7.90s/it] 99%|█████████▉| 12068/12188 [2:24:04<15:55, 7.96s/it] {'loss': 0.3133, 'grad_norm': 0.6954606214539052, 'learning_rate': 2.5845870584317865e-09, 'epoch': 0.99} + 99%|█████████▉| 12068/12188 [2:24:04<15:55, 7.96s/it] 99%|█████████▉| 12069/12188 [2:24:12<15:42, 7.92s/it] {'loss': 0.3143, 'grad_norm': 0.679555082822226, 'learning_rate': 2.542046747282534e-09, 'epoch': 0.99} + 99%|█████████▉| 12069/12188 [2:24:12<15:42, 7.92s/it] 99%|█████████▉| 12070/12188 [2:24:20<15:40, 7.97s/it] {'loss': 0.3224, 'grad_norm': 0.7651483017920145, 'learning_rate': 2.4998593485853204e-09, 'epoch': 0.99} + 99%|█████████▉| 12070/12188 [2:24:20<15:40, 7.97s/it] 99%|█████████▉| 12071/12188 [2:24:28<15:29, 7.94s/it] {'loss': 0.2761, 'grad_norm': 0.7078481093095091, 'learning_rate': 2.4580248653183204e-09, 'epoch': 0.99} + 99%|█████████▉| 12071/12188 [2:24:28<15:29, 7.94s/it] 99%|█████████▉| 12072/12188 [2:24:36<15:10, 7.85s/it] {'loss': 0.2863, 'grad_norm': 0.7464613919920773, 'learning_rate': 2.4165433004363914e-09, 'epoch': 0.99} + 99%|█████████▉| 12072/12188 [2:24:36<15:10, 7.85s/it] 99%|█████████▉| 12073/12188 [2:24:46<16:11, 8.45s/it] {'loss': 0.3269, 'grad_norm': 0.7267098821003252, 'learning_rate': 2.3754146568694124e-09, 'epoch': 0.99} + 99%|█████████▉| 12073/12188 [2:24:46<16:11, 8.45s/it] 99%|█████████▉| 12074/12188 [2:24:53<15:43, 8.27s/it] {'loss': 0.3132, 'grad_norm': 0.8344591811903043, 'learning_rate': 2.334638937521172e-09, 'epoch': 0.99} + 99%|█████████▉| 12074/12188 [2:24:53<15:43, 8.27s/it] 99%|█████████▉| 12075/12188 [2:25:01<15:08, 8.04s/it] {'loss': 0.286, 'grad_norm': 0.7306083269006632, 'learning_rate': 2.2942161452715883e-09, 'epoch': 0.99} + 99%|█████████▉| 12075/12188 [2:25:01<15:08, 8.04s/it] 99%|█████████▉| 12076/12188 [2:25:09<14:51, 7.96s/it] {'loss': 0.3116, 'grad_norm': 0.7903321938869761, 'learning_rate': 2.25414628297449e-09, 'epoch': 0.99} + 99%|█████████▉| 12076/12188 [2:25:09<14:51, 7.96s/it] 99%|█████████▉| 12077/12188 [2:25:17<15:11, 8.21s/it] {'loss': 0.2966, 'grad_norm': 0.7234864551561773, 'learning_rate': 2.21442935346039e-09, 'epoch': 0.99} + 99%|█████████▉| 12077/12188 [2:25:18<15:11, 8.21s/it] 99%|█████████▉| 12078/12188 [2:25:25<14:55, 8.14s/it] {'loss': 0.3035, 'grad_norm': 0.7118018459937393, 'learning_rate': 2.1750653595337125e-09, 'epoch': 0.99} + 99%|█████████▉| 12078/12188 [2:25:25<14:55, 8.14s/it] 99%|█████████▉| 12079/12188 [2:25:33<14:35, 8.03s/it] {'loss': 0.2954, 'grad_norm': 0.789888007347094, 'learning_rate': 2.136054303974455e-09, 'epoch': 0.99} + 99%|█████████▉| 12079/12188 [2:25:33<14:35, 8.03s/it] 99%|█████████▉| 12080/12188 [2:25:41<14:10, 7.88s/it] {'loss': 0.3254, 'grad_norm': 1.0986616055415148, 'learning_rate': 2.097396189537637e-09, 'epoch': 0.99} + 99%|█████████▉| 12080/12188 [2:25:41<14:10, 7.88s/it] 99%|█████████▉| 12081/12188 [2:25:48<13:46, 7.73s/it] {'loss': 0.3207, 'grad_norm': 0.6773704384309996, 'learning_rate': 2.059091018952186e-09, 'epoch': 0.99} + 99%|█████████▉| 12081/12188 [2:25:48<13:46, 7.73s/it] 99%|█████████▉| 12082/12188 [2:25:56<13:32, 7.66s/it] {'loss': 0.3174, 'grad_norm': 0.6383976740267153, 'learning_rate': 2.0211387949242712e-09, 'epoch': 0.99} + 99%|█████████▉| 12082/12188 [2:25:56<13:32, 7.66s/it] 99%|█████████▉| 12083/12188 [2:26:04<13:58, 7.99s/it] {'loss': 0.3213, 'grad_norm': 0.7096954231602314, 'learning_rate': 1.98353952013397e-09, 'epoch': 0.99} + 99%|█████████▉| 12083/12188 [2:26:04<13:58, 7.99s/it] 99%|█████████▉| 12084/12188 [2:26:12<13:29, 7.79s/it] {'loss': 0.2922, 'grad_norm': 0.7140533195408271, 'learning_rate': 1.9462931972358268e-09, 'epoch': 0.99} + 99%|█████████▉| 12084/12188 [2:26:12<13:29, 7.79s/it] 99%|█████████▉| 12085/12188 [2:26:20<13:52, 8.09s/it] {'loss': 0.297, 'grad_norm': 0.7201562744898937, 'learning_rate': 1.9093998288605144e-09, 'epoch': 0.99} + 99%|█████████▉| 12085/12188 [2:26:20<13:52, 8.09s/it] 99%|█████████▉| 12086/12188 [2:26:28<13:35, 8.00s/it] {'loss': 0.2931, 'grad_norm': 0.7756326626687771, 'learning_rate': 1.8728594176131707e-09, 'epoch': 0.99} + 99%|█████████▉| 12086/12188 [2:26:28<13:35, 8.00s/it] 99%|█████████▉| 12087/12188 [2:26:36<13:09, 7.82s/it] {'loss': 0.3243, 'grad_norm': 1.2652358533238566, 'learning_rate': 1.8366719660750653e-09, 'epoch': 0.99} + 99%|█████████▉| 12087/12188 [2:26:36<13:09, 7.82s/it] 99%|█████████▉| 12088/12188 [2:26:43<12:46, 7.67s/it] {'loss': 0.3127, 'grad_norm': 0.7715954884269426, 'learning_rate': 1.8008374768002657e-09, 'epoch': 0.99} + 99%|█████████▉| 12088/12188 [2:26:43<12:46, 7.67s/it] 99%|█████████▉| 12089/12188 [2:26:51<12:41, 7.69s/it] {'loss': 0.2975, 'grad_norm': 0.6839474039602714, 'learning_rate': 1.7653559523206353e-09, 'epoch': 0.99} + 99%|█████████▉| 12089/12188 [2:26:51<12:41, 7.69s/it] 99%|█████████▉| 12090/12188 [2:26:58<12:33, 7.69s/it] {'loss': 0.2967, 'grad_norm': 0.649725165773083, 'learning_rate': 1.7302273951408376e-09, 'epoch': 0.99} + 99%|█████████▉| 12090/12188 [2:26:58<12:33, 7.69s/it] 99%|█████████▉| 12091/12188 [2:27:06<12:21, 7.64s/it] {'loss': 0.3123, 'grad_norm': 0.6807266999467324, 'learning_rate': 1.695451807742221e-09, 'epoch': 0.99} + 99%|█████████▉| 12091/12188 [2:27:06<12:21, 7.64s/it] 99%|█████████▉| 12092/12188 [2:27:15<12:48, 8.00s/it] {'loss': 0.2549, 'grad_norm': 0.7490932968864299, 'learning_rate': 1.661029192580599e-09, 'epoch': 0.99} + 99%|█████████▉| 12092/12188 [2:27:15<12:48, 8.00s/it] 99%|█████████▉| 12093/12188 [2:27:23<12:34, 7.95s/it] {'loss': 0.2846, 'grad_norm': 0.77747696663745, 'learning_rate': 1.6269595520862492e-09, 'epoch': 0.99} + 99%|█████████▉| 12093/12188 [2:27:23<12:34, 7.95s/it] 99%|█████████▉| 12094/12188 [2:27:30<12:18, 7.86s/it] {'loss': 0.2826, 'grad_norm': 0.7280692182470332, 'learning_rate': 1.5932428886661355e-09, 'epoch': 0.99} + 99%|█████████▉| 12094/12188 [2:27:30<12:18, 7.86s/it] 99%|█████████▉| 12095/12188 [2:27:38<12:07, 7.82s/it] {'loss': 0.269, 'grad_norm': 0.8378323751021993, 'learning_rate': 1.5598792047000212e-09, 'epoch': 0.99} + 99%|█████████▉| 12095/12188 [2:27:38<12:07, 7.82s/it] 99%|█████████▉| 12096/12188 [2:27:45<11:49, 7.71s/it] {'loss': 0.2851, 'grad_norm': 0.7147663434836368, 'learning_rate': 1.5268685025449093e-09, 'epoch': 0.99} + 99%|█████████▉| 12096/12188 [2:27:45<11:49, 7.71s/it] 99%|█████████▉| 12097/12188 [2:27:53<11:33, 7.62s/it] {'loss': 0.3436, 'grad_norm': 0.7127686052171058, 'learning_rate': 1.4942107845317132e-09, 'epoch': 0.99} + 99%|█████████▉| 12097/12188 [2:27:53<11:33, 7.62s/it] 99%|█████████▉| 12098/12188 [2:28:00<11:16, 7.51s/it] {'loss': 0.3168, 'grad_norm': 0.7049640916240326, 'learning_rate': 1.4619060529663664e-09, 'epoch': 0.99} + 99%|█████████▉| 12098/12188 [2:28:00<11:16, 7.51s/it] 99%|█████████▉| 12099/12188 [2:28:08<11:10, 7.53s/it] {'loss': 0.2859, 'grad_norm': 0.9930834411311188, 'learning_rate': 1.4299543101309321e-09, 'epoch': 0.99} + 99%|█████████▉| 12099/12188 [2:28:08<11:10, 7.53s/it] 99%|█████████▉| 12100/12188 [2:28:15<11:07, 7.59s/it] {'loss': 0.3169, 'grad_norm': 0.7763617665508145, 'learning_rate': 1.3983555582808283e-09, 'epoch': 0.99} + 99%|█████████▉| 12100/12188 [2:28:15<11:07, 7.59s/it] 99%|█████████▉| 12101/12188 [2:28:23<10:57, 7.56s/it] {'loss': 0.3018, 'grad_norm': 0.6932619135201484, 'learning_rate': 1.3671097996487137e-09, 'epoch': 0.99} + 99%|█████████▉| 12101/12188 [2:28:23<10:57, 7.56s/it] 99%|█████████▉| 12102/12188 [2:28:30<10:43, 7.49s/it] {'loss': 0.2779, 'grad_norm': 0.6887769570773651, 'learning_rate': 1.336217036439491e-09, 'epoch': 0.99} + 99%|█████████▉| 12102/12188 [2:28:30<10:43, 7.49s/it] 99%|█████████▉| 12103/12188 [2:28:38<10:39, 7.52s/it] {'loss': 0.3101, 'grad_norm': 0.7589839286393315, 'learning_rate': 1.305677270836414e-09, 'epoch': 0.99} + 99%|█████████▉| 12103/12188 [2:28:38<10:39, 7.52s/it] 99%|█████████▉| 12104/12188 [2:28:45<10:31, 7.51s/it] {'loss': 0.2337, 'grad_norm': 0.6364487346079452, 'learning_rate': 1.2754905049949807e-09, 'epoch': 0.99} + 99%|█████████▉| 12104/12188 [2:28:45<10:31, 7.51s/it] 99%|█████████▉| 12105/12188 [2:28:53<10:21, 7.49s/it] {'loss': 0.3294, 'grad_norm': 0.6893315274832005, 'learning_rate': 1.2456567410473742e-09, 'epoch': 0.99} + 99%|█████████▉| 12105/12188 [2:28:53<10:21, 7.49s/it] 99%|█████████▉| 12106/12188 [2:29:00<10:19, 7.55s/it] {'loss': 0.2799, 'grad_norm': 0.8215547828397164, 'learning_rate': 1.2161759811002428e-09, 'epoch': 0.99} + 99%|█████████▉| 12106/12188 [2:29:00<10:19, 7.55s/it] 99%|█████████▉| 12107/12188 [2:29:08<10:09, 7.53s/it] {'loss': 0.2985, 'grad_norm': 0.9293954220756048, 'learning_rate': 1.1870482272358096e-09, 'epoch': 0.99} + 99%|█████████▉| 12107/12188 [2:29:08<10:09, 7.53s/it] 99%|█████████▉| 12108/12188 [2:29:16<10:20, 7.76s/it] {'loss': 0.3037, 'grad_norm': 0.6881899743896706, 'learning_rate': 1.1582734815107633e-09, 'epoch': 0.99} + 99%|█████████▉| 12108/12188 [2:29:16<10:20, 7.76s/it] 99%|█████████▉| 12109/12188 [2:29:26<11:03, 8.40s/it] {'loss': 0.3005, 'grad_norm': 0.7734113944130752, 'learning_rate': 1.1298517459573666e-09, 'epoch': 0.99} + 99%|█████████▉| 12109/12188 [2:29:26<11:03, 8.40s/it] 99%|█████████▉| 12110/12188 [2:29:34<10:38, 8.18s/it] {'loss': 0.3145, 'grad_norm': 0.7079005660307379, 'learning_rate': 1.1017830225823478e-09, 'epoch': 0.99} + 99%|█████████▉| 12110/12188 [2:29:34<10:38, 8.18s/it] 99%|█████████▉| 12111/12188 [2:29:41<10:15, 7.99s/it] {'loss': 0.2905, 'grad_norm': 0.6497412952769558, 'learning_rate': 1.0740673133680102e-09, 'epoch': 0.99} + 99%|█████████▉| 12111/12188 [2:29:41<10:15, 7.99s/it] 99%|█████████▉| 12112/12188 [2:29:49<10:03, 7.94s/it] {'loss': 0.2636, 'grad_norm': 0.7442311138812682, 'learning_rate': 1.046704620271677e-09, 'epoch': 0.99} + 99%|█████████▉| 12112/12188 [2:29:49<10:03, 7.94s/it] 99%|█████████▉| 12113/12188 [2:29:56<09:41, 7.75s/it] {'loss': 0.3034, 'grad_norm': 0.7497516587316622, 'learning_rate': 1.019694945225691e-09, 'epoch': 0.99} + 99%|█████████▉| 12113/12188 [2:29:56<09:41, 7.75s/it] 99%|█████████▉| 12114/12188 [2:30:05<09:49, 7.97s/it] {'loss': 0.3236, 'grad_norm': 0.686756718975904, 'learning_rate': 9.930382901374157e-10, 'epoch': 0.99} + 99%|█████████▉| 12114/12188 [2:30:05<09:49, 7.97s/it] 99%|█████████▉| 12115/12188 [2:30:13<09:33, 7.85s/it] {'loss': 0.2918, 'grad_norm': 0.6955790036871125, 'learning_rate': 9.667346568892344e-10, 'epoch': 0.99} + 99%|█████████▉| 12115/12188 [2:30:13<09:33, 7.85s/it] 99%|█████████▉| 12116/12188 [2:30:20<09:17, 7.74s/it] {'loss': 0.2902, 'grad_norm': 0.7442942191475674, 'learning_rate': 9.407840473385499e-10, 'epoch': 0.99} + 99%|█████████▉| 12116/12188 [2:30:20<09:17, 7.74s/it] 99%|█████████▉| 12117/12188 [2:30:28<09:05, 7.68s/it] {'loss': 0.3222, 'grad_norm': 0.8365599911759198, 'learning_rate': 9.151864633183405e-10, 'epoch': 0.99} + 99%|█████████▉| 12117/12188 [2:30:28<09:05, 7.68s/it] 99%|█████████▉| 12118/12188 [2:30:35<08:51, 7.59s/it] {'loss': 0.3172, 'grad_norm': 0.7276170706019655, 'learning_rate': 8.899419066360492e-10, 'epoch': 0.99} + 99%|█████████▉| 12118/12188 [2:30:35<08:51, 7.59s/it] 99%|█████████▉| 12119/12188 [2:30:42<08:41, 7.56s/it] {'loss': 0.3018, 'grad_norm': 0.7372365016067817, 'learning_rate': 8.650503790741394e-10, 'epoch': 0.99} + 99%|█████████▉| 12119/12188 [2:30:42<08:41, 7.56s/it] 99%|█████████▉| 12120/12188 [2:30:51<08:47, 7.76s/it] {'loss': 0.2741, 'grad_norm': 0.7323286009602681, 'learning_rate': 8.405118823906489e-10, 'epoch': 0.99} + 99%|█████████▉| 12120/12188 [2:30:51<08:47, 7.76s/it] 99%|█████████▉| 12121/12188 [2:30:58<08:32, 7.66s/it] {'loss': 0.2788, 'grad_norm': 0.6801148325772978, 'learning_rate': 8.163264183186359e-10, 'epoch': 0.99} + 99%|█████████▉| 12121/12188 [2:30:58<08:32, 7.66s/it] 99%|█████████▉| 12122/12188 [2:31:05<08:18, 7.55s/it] {'loss': 0.3247, 'grad_norm': 0.7320228669586563, 'learning_rate': 7.924939885656235e-10, 'epoch': 0.99} + 99%|█████████▉| 12122/12188 [2:31:05<08:18, 7.55s/it] 99%|█████████▉| 12123/12188 [2:31:13<08:13, 7.59s/it] {'loss': 0.2984, 'grad_norm': 0.6998227784924308, 'learning_rate': 7.690145948152649e-10, 'epoch': 0.99} + 99%|█████████▉| 12123/12188 [2:31:13<08:13, 7.59s/it] 99%|█████████▉| 12124/12188 [2:31:21<08:05, 7.59s/it] {'loss': 0.3119, 'grad_norm': 0.6822689190777981, 'learning_rate': 7.458882387245681e-10, 'epoch': 0.99} + 99%|█████████▉| 12124/12188 [2:31:21<08:05, 7.59s/it] 99%|█████████▉| 12125/12188 [2:31:28<07:58, 7.59s/it] {'loss': 0.3086, 'grad_norm': 1.1432553741525204, 'learning_rate': 7.231149219277811e-10, 'epoch': 0.99} + 99%|█████████▉| 12125/12188 [2:31:28<07:58, 7.59s/it] 99%|█████████▉| 12126/12188 [2:31:36<07:52, 7.62s/it] {'loss': 0.2956, 'grad_norm': 0.8174961107312826, 'learning_rate': 7.00694646032507e-10, 'epoch': 0.99} + 99%|█████████▉| 12126/12188 [2:31:36<07:52, 7.62s/it] 99%|█████████▉| 12127/12188 [2:31:43<07:43, 7.60s/it] {'loss': 0.3507, 'grad_norm': 0.8813890706848693, 'learning_rate': 6.78627412622479e-10, 'epoch': 0.99} + 99%|█████████▉| 12127/12188 [2:31:43<07:43, 7.60s/it] 100%|█████████▉| 12128/12188 [2:31:51<07:31, 7.53s/it] {'loss': 0.3085, 'grad_norm': 0.7459278609136889, 'learning_rate': 6.569132232553399e-10, 'epoch': 1.0} + 100%|█████████▉| 12128/12188 [2:31:51<07:31, 7.53s/it] 100%|█████████▉| 12129/12188 [2:31:59<07:27, 7.58s/it] {'loss': 0.2807, 'grad_norm': 0.6936859796155047, 'learning_rate': 6.35552079465418e-10, 'epoch': 1.0} + 100%|█████████▉| 12129/12188 [2:31:59<07:27, 7.58s/it] 100%|█████████▉| 12130/12188 [2:32:06<07:15, 7.51s/it] {'loss': 0.3011, 'grad_norm': 0.7066791681711961, 'learning_rate': 6.145439827603961e-10, 'epoch': 1.0} + 100%|█████████▉| 12130/12188 [2:32:06<07:15, 7.51s/it] 100%|█████████▉| 12131/12188 [2:32:15<07:39, 8.07s/it] {'loss': 0.3282, 'grad_norm': 0.6654944860608583, 'learning_rate': 5.938889346240873e-10, 'epoch': 1.0} + 100%|█████████▉| 12131/12188 [2:32:15<07:39, 8.07s/it] 100%|█████████▉| 12132/12188 [2:32:23<07:20, 7.86s/it] {'loss': 0.2819, 'grad_norm': 0.7424028604245797, 'learning_rate': 5.735869365153247e-10, 'epoch': 1.0} + 100%|█████████▉| 12132/12188 [2:32:23<07:20, 7.86s/it] 100%|█████████▉| 12133/12188 [2:32:31<07:18, 7.98s/it] {'loss': 0.2749, 'grad_norm': 0.6457238467336138, 'learning_rate': 5.536379898674061e-10, 'epoch': 1.0} + 100%|█████████▉| 12133/12188 [2:32:31<07:18, 7.98s/it] 100%|█████████▉| 12134/12188 [2:32:38<06:59, 7.77s/it] {'loss': 0.3021, 'grad_norm': 0.697404311749881, 'learning_rate': 5.340420960897597e-10, 'epoch': 1.0} + 100%|█████████▉| 12134/12188 [2:32:38<06:59, 7.77s/it] 100%|█████████▉| 12135/12188 [2:32:46<06:47, 7.69s/it] {'loss': 0.3546, 'grad_norm': 0.735622371137498, 'learning_rate': 5.147992565657234e-10, 'epoch': 1.0} + 100%|█████████▉| 12135/12188 [2:32:46<06:47, 7.69s/it] 100%|█████████▉| 12136/12188 [2:32:53<06:37, 7.65s/it] {'loss': 0.3078, 'grad_norm': 0.8620438060987387, 'learning_rate': 4.959094726542102e-10, 'epoch': 1.0} + 100%|█████████▉| 12136/12188 [2:32:53<06:37, 7.65s/it] 100%|█████████▉| 12137/12188 [2:33:01<06:33, 7.72s/it] {'loss': 0.2913, 'grad_norm': 0.7490479748944667, 'learning_rate': 4.77372745689153e-10, 'epoch': 1.0} + 100%|█████████▉| 12137/12188 [2:33:01<06:33, 7.72s/it] 100%|█████████▉| 12138/12188 [2:33:09<06:23, 7.67s/it] {'loss': 0.2541, 'grad_norm': 0.7997867436209518, 'learning_rate': 4.591890769795049e-10, 'epoch': 1.0} + 100%|█████████▉| 12138/12188 [2:33:09<06:23, 7.67s/it] 100%|█████████▉| 12139/12188 [2:33:16<06:08, 7.53s/it] {'loss': 0.3164, 'grad_norm': 0.6768657521444497, 'learning_rate': 4.4135846780979373e-10, 'epoch': 1.0} + 100%|█████████▉| 12139/12188 [2:33:16<06:08, 7.53s/it] 100%|█████████▉| 12140/12188 [2:33:25<06:19, 7.91s/it] {'loss': 0.2878, 'grad_norm': 0.6971419056316803, 'learning_rate': 4.2388091943901257e-10, 'epoch': 1.0} + 100%|█████████▉| 12140/12188 [2:33:25<06:19, 7.91s/it] 100%|█████████▉| 12141/12188 [2:33:32<06:07, 7.81s/it] {'loss': 0.292, 'grad_norm': 0.7185564820010338, 'learning_rate': 4.0675643310117417e-10, 'epoch': 1.0} + 100%|█████████▉| 12141/12188 [2:33:32<06:07, 7.81s/it] 100%|█████████▉| 12142/12188 [2:33:40<05:52, 7.67s/it] {'loss': 0.2532, 'grad_norm': 0.7245075954323954, 'learning_rate': 3.8998501000586664e-10, 'epoch': 1.0} + 100%|█████████▉| 12142/12188 [2:33:40<05:52, 7.67s/it] 100%|█████████▉| 12143/12188 [2:33:48<05:49, 7.76s/it] {'loss': 0.3324, 'grad_norm': 0.7062481657985381, 'learning_rate': 3.735666513371428e-10, 'epoch': 1.0} + 100%|█████████▉| 12143/12188 [2:33:48<05:49, 7.76s/it] 100%|█████████▉| 12144/12188 [2:33:55<05:42, 7.78s/it] {'loss': 0.3247, 'grad_norm': 0.7019017600811804, 'learning_rate': 3.575013582546305e-10, 'epoch': 1.0} + 100%|█████████▉| 12144/12188 [2:33:55<05:42, 7.78s/it] 100%|█████████▉| 12145/12188 [2:34:04<05:41, 7.94s/it] {'loss': 0.2827, 'grad_norm': 0.7453614855340213, 'learning_rate': 3.417891318929778e-10, 'epoch': 1.0} + 100%|█████████▉| 12145/12188 [2:34:04<05:41, 7.94s/it] 100%|█████████▉| 12146/12188 [2:34:11<05:30, 7.87s/it] {'loss': 0.337, 'grad_norm': 0.7269582015069586, 'learning_rate': 3.2642997336185257e-10, 'epoch': 1.0} + 100%|█████████▉| 12146/12188 [2:34:11<05:30, 7.87s/it] 100%|█████████▉| 12147/12188 [2:34:19<05:19, 7.80s/it] {'loss': 0.2907, 'grad_norm': 0.6626479973421284, 'learning_rate': 3.114238837453876e-10, 'epoch': 1.0} + 100%|█████████▉| 12147/12188 [2:34:19<05:19, 7.80s/it] 100%|█████████▉| 12148/12188 [2:34:27<05:19, 7.99s/it] {'loss': 0.3096, 'grad_norm': 0.7682351476336463, 'learning_rate': 2.967708641032907e-10, 'epoch': 1.0} + 100%|█████████▉| 12148/12188 [2:34:27<05:19, 7.99s/it] 100%|█████████▉| 12149/12188 [2:34:35<05:05, 7.82s/it] {'loss': 0.2565, 'grad_norm': 0.7327147814162173, 'learning_rate': 2.8247091547084496e-10, 'epoch': 1.0} + 100%|█████████▉| 12149/12188 [2:34:35<05:05, 7.82s/it] 100%|█████████▉| 12150/12188 [2:34:43<04:56, 7.80s/it] {'loss': 0.3118, 'grad_norm': 0.7149256545468203, 'learning_rate': 2.685240388577981e-10, 'epoch': 1.0} + 100%|█████████▉| 12150/12188 [2:34:43<04:56, 7.80s/it] 100%|█████████▉| 12151/12188 [2:34:50<04:44, 7.68s/it] {'loss': 0.3033, 'grad_norm': 0.9139052722347522, 'learning_rate': 2.54930235248918e-10, 'epoch': 1.0} + 100%|█████████▉| 12151/12188 [2:34:50<04:44, 7.68s/it] 100%|█████████▉| 12152/12188 [2:34:58<04:36, 7.69s/it] {'loss': 0.2875, 'grad_norm': 0.7119349281319355, 'learning_rate': 2.4168950560399254e-10, 'epoch': 1.0} + 100%|█████████▉| 12152/12188 [2:34:58<04:36, 7.69s/it] 100%|█████████▉| 12153/12188 [2:35:05<04:25, 7.60s/it] {'loss': 0.2964, 'grad_norm': 0.6788835895940576, 'learning_rate': 2.2880185085838447e-10, 'epoch': 1.0} + 100%|██��██████▉| 12153/12188 [2:35:05<04:25, 7.60s/it] 100%|█████████▉| 12154/12188 [2:35:13<04:18, 7.59s/it] {'loss': 0.2829, 'grad_norm': 0.6719440568678516, 'learning_rate': 2.1626727192192164e-10, 'epoch': 1.0} + 100%|█████████▉| 12154/12188 [2:35:13<04:18, 7.59s/it] 100%|█████████▉| 12155/12188 [2:35:21<04:16, 7.77s/it] {'loss': 0.291, 'grad_norm': 0.7205698199116506, 'learning_rate': 2.0408576968000693e-10, 'epoch': 1.0} + 100%|█████████▉| 12155/12188 [2:35:21<04:16, 7.77s/it] 100%|█████████▉| 12156/12188 [2:35:29<04:07, 7.73s/it] {'loss': 0.318, 'grad_norm': 0.7520700516753814, 'learning_rate': 1.9225734499250802e-10, 'epoch': 1.0} + 100%|█████████▉| 12156/12188 [2:35:29<04:07, 7.73s/it] 100%|█████████▉| 12157/12188 [2:35:36<03:57, 7.65s/it] {'loss': 0.3024, 'grad_norm': 0.7307349560232842, 'learning_rate': 1.807819986948678e-10, 'epoch': 1.0} + 100%|█████████▉| 12157/12188 [2:35:36<03:57, 7.65s/it] 100%|█████████▉| 12158/12188 [2:35:44<03:50, 7.67s/it] {'loss': 0.3087, 'grad_norm': 0.6858342788771874, 'learning_rate': 1.6965973159810413e-10, 'epoch': 1.0} + 100%|█████████▉| 12158/12188 [2:35:44<03:50, 7.67s/it] 100%|█████████▉| 12159/12188 [2:35:51<03:42, 7.68s/it] {'loss': 0.3009, 'grad_norm': 0.6726860341918907, 'learning_rate': 1.5889054448658957e-10, 'epoch': 1.0} + 100%|█████████▉| 12159/12188 [2:35:51<03:42, 7.68s/it] 100%|█████████▉| 12160/12188 [2:35:59<03:36, 7.74s/it] {'loss': 0.298, 'grad_norm': 0.6838097611739047, 'learning_rate': 1.4847443812193717e-10, 'epoch': 1.0} + 100%|█████████▉| 12160/12188 [2:35:59<03:36, 7.74s/it] 100%|█████████▉| 12161/12188 [2:36:07<03:28, 7.74s/it] {'loss': 0.2895, 'grad_norm': 0.680738351778282, 'learning_rate': 1.3841141323855946e-10, 'epoch': 1.0} + 100%|█████████▉| 12161/12188 [2:36:07<03:28, 7.74s/it] 100%|█████████▉| 12162/12188 [2:36:15<03:23, 7.82s/it] {'loss': 0.3093, 'grad_norm': 0.6847825400400924, 'learning_rate': 1.2870147054810933e-10, 'epoch': 1.0} + 100%|█████████▉| 12162/12188 [2:36:15<03:23, 7.82s/it] 100%|█████████▉| 12163/12188 [2:36:23<03:13, 7.74s/it] {'loss': 0.2859, 'grad_norm': 0.6637383014754411, 'learning_rate': 1.1934461073559444e-10, 'epoch': 1.0} + 100%|█████████▉| 12163/12188 [2:36:23<03:13, 7.74s/it] 100%|█████████▉| 12164/12188 [2:36:30<03:05, 7.71s/it] {'loss': 0.2753, 'grad_norm': 0.6680863702533094, 'learning_rate': 1.103408344621526e-10, 'epoch': 1.0} + 100%|█████████▉| 12164/12188 [2:36:30<03:05, 7.71s/it] 100%|█████████▉| 12165/12188 [2:36:38<02:56, 7.67s/it] {'loss': 0.3463, 'grad_norm': 0.6740179372925819, 'learning_rate': 1.0169014236394159e-10, 'epoch': 1.0} + 100%|█████████▉| 12165/12188 [2:36:38<02:56, 7.67s/it] 100%|█████████▉| 12166/12188 [2:36:45<02:47, 7.61s/it] {'loss': 0.2506, 'grad_norm': 0.7270722781893997, 'learning_rate': 9.339253505102896e-11, 'epoch': 1.0} + 100%|█████████▉| 12166/12188 [2:36:45<02:47, 7.61s/it] 100%|█████████▉| 12167/12188 [2:36:53<02:37, 7.52s/it] {'loss': 0.3332, 'grad_norm': 0.8367408345914051, 'learning_rate': 8.544801310961248e-11, 'epoch': 1.0} + 100%|█████████▉| 12167/12188 [2:36:53<02:37, 7.52s/it] 100%|█████████▉| 12168/12188 [2:37:00<02:31, 7.57s/it] {'loss': 0.2769, 'grad_norm': 0.7450982306246592, 'learning_rate': 7.785657710090988e-11, 'epoch': 1.0} + 100%|█████████▉| 12168/12188 [2:37:00<02:31, 7.57s/it] 100%|█████████▉| 12169/12188 [2:37:08<02:24, 7.58s/it] {'loss': 0.3022, 'grad_norm': 0.6848980772909937, 'learning_rate': 7.061822756115888e-11, 'epoch': 1.0} + 100%|█████████▉| 12169/12188 [2:37:08<02:24, 7.58s/it] 100%|███████���█▉| 12170/12188 [2:37:16<02:21, 7.88s/it] {'loss': 0.2884, 'grad_norm': 0.7260283720764462, 'learning_rate': 6.373296500106207e-11, 'epoch': 1.0} + 100%|█████████▉| 12170/12188 [2:37:17<02:21, 7.88s/it] 100%|█████████▉| 12171/12188 [2:37:24<02:11, 7.73s/it] {'loss': 0.2959, 'grad_norm': 0.7171866644126272, 'learning_rate': 5.720078990745226e-11, 'epoch': 1.0} + 100%|█████████▉| 12171/12188 [2:37:24<02:11, 7.73s/it] 100%|█████████▉| 12172/12188 [2:37:32<02:03, 7.72s/it] {'loss': 0.2751, 'grad_norm': 0.7008026972218931, 'learning_rate': 5.10217027416271e-11, 'epoch': 1.0} + 100%|█████████▉| 12172/12188 [2:37:32<02:03, 7.72s/it] 100%|█████████▉| 12173/12188 [2:37:39<01:53, 7.59s/it] {'loss': 0.2885, 'grad_norm': 0.7313592627555455, 'learning_rate': 4.519570393879402e-11, 'epoch': 1.0} + 100%|█████████▉| 12173/12188 [2:37:39<01:53, 7.59s/it] 100%|█████████▉| 12174/12188 [2:37:48<01:53, 8.12s/it] {'loss': 0.3299, 'grad_norm': 0.6808936275331859, 'learning_rate': 3.972279391195599e-11, 'epoch': 1.0} + 100%|█████████▉| 12174/12188 [2:37:48<01:53, 8.12s/it] 100%|█████████▉| 12175/12188 [2:37:56<01:43, 7.94s/it] {'loss': 0.2828, 'grad_norm': 0.7229704094762887, 'learning_rate': 3.4602973046360396e-11, 'epoch': 1.0} + 100%|█████████▉| 12175/12188 [2:37:56<01:43, 7.94s/it] 100%|█████████▉| 12176/12188 [2:38:03<01:33, 7.79s/it] {'loss': 0.2741, 'grad_norm': 0.6721469598558081, 'learning_rate': 2.983624170393995e-11, 'epoch': 1.0} + 100%|█████████▉| 12176/12188 [2:38:03<01:33, 7.79s/it] 100%|█████████▉| 12177/12188 [2:38:11<01:24, 7.67s/it] {'loss': 0.3132, 'grad_norm': 0.73856770978565, 'learning_rate': 2.5422600221647332e-11, 'epoch': 1.0} + 100%|█████████▉| 12177/12188 [2:38:11<01:24, 7.67s/it]Traceback (most recent call last): + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1222, in __getitem__ + sample = self._get_item(i) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in _get_item + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 1352, in + results = [self.process_image_unified(file) for file in image_file] + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 897, in process_image_unified + image = load_image(image_file, tcs_loader=self.tcs_loader) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 156, in load_image + return tcs_loader(image_path) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 105, in __call__ + img = self.pil_loader(img_value_str) + File "/mnt/hwfile/liuzhaoyang/workspace/GUIAgent/qwen-vl-finetune/qwenvl/data/data_qwen_2.py", line 89, in pil_loader + img = Image.open(buff) + File "/mnt/petrelfs/liuzhaoyang/workspace/programs/miniconda3/envs/qwen2_5vl/lib/python3.10/site-packages/PIL/Image.py", line 3536, in open + raise UnidentifiedImageError(msg) +PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f44ccd72f70> +[Try #0] Failed to fetch sample 4787019 in VC:s3://gui/OS-Atlas/desktop_domain/windows_images. Exception: cannot identify image file <_io.BytesIO object at 0x7f44ccd72f70> +Problematic sample: {'image': '20240822_131046_before_screenshot_sub0.png', 'conversations': [{'from': 'human', 'value': "\nClick on 'Slice'"}, {'from': 'gpt', 'value': '\nclick(x=0.6985, y=0.139)\n'}]} + 100%|█████████▉| 12178/12188 [2:38:21<01:23, 8.39s/it] {'loss': 0.2754, 'grad_norm': 0.7158585408689695, 'learning_rate': 2.1362048910900103e-11, 'epoch': 1.0} + 100%|█████████▉| 12178/12188 [2:38:21<01:23, 8.39s/it] 100%|█████████▉| 12179/12188 [2:38:28<01:12, 8.11s/it] {'loss': 0.2786, 'grad_norm': 0.6213247263896023, 'learning_rate': 1.7654588058690914e-11, 'epoch': 1.0} + 100%|█████████▉| 12179/12188 [2:38:28<01:12, 8.11s/it] 100%|█████████▉| 12180/12188 [2:38:35<01:03, 7.90s/it] {'loss': 0.3241, 'grad_norm': 0.7056029790445787, 'learning_rate': 1.4300217926477289e-11, 'epoch': 1.0} + 100%|█████████▉| 12180/12188 [2:38:36<01:03, 7.90s/it] 100%|█████████▉| 12181/12188 [2:38:44<00:56, 8.08s/it] {'loss': 0.3309, 'grad_norm': 0.7338911248734207, 'learning_rate': 1.1298938751291843e-11, 'epoch': 1.0} + 100%|█████████▉| 12181/12188 [2:38:44<00:56, 8.08s/it] 100%|█████████▉| 12182/12188 [2:38:51<00:47, 7.90s/it] {'loss': 0.3106, 'grad_norm': 0.730576473900203, 'learning_rate': 8.650750745187175e-12, 'epoch': 1.0} + 100%|█████████▉| 12182/12188 [2:38:51<00:47, 7.90s/it] 100%|█████████▉| 12183/12188 [2:38:59<00:38, 7.71s/it] {'loss': 0.301, 'grad_norm': 0.744057119528094, 'learning_rate': 6.355654094680752e-12, 'epoch': 1.0} + 100%|█████████▉| 12183/12188 [2:38:59<00:38, 7.71s/it] 100%|█████████▉| 12184/12188 [2:39:07<00:30, 7.73s/it] {'loss': 0.2749, 'grad_norm': 0.697079369277063, 'learning_rate': 4.413648962975358e-12, 'epoch': 1.0} + 100%|█████████▉| 12184/12188 [2:39:07<00:30, 7.73s/it] 100%|█████████▉| 12185/12188 [2:39:16<00:24, 8.23s/it] {'loss': 0.2758, 'grad_norm': 0.6239705306645849, 'learning_rate': 2.8247354855182043e-12, 'epoch': 1.0} + 100%|█████████▉| 12185/12188 [2:39:16<00:24, 8.23s/it] 100%|█████████▉| 12186/12188 [2:39:24<00:16, 8.21s/it] {'loss': 0.3068, 'grad_norm': 0.8171981682247088, 'learning_rate': 1.5889137761071482e-12, 'epoch': 1.0} + 100%|█████████▉| 12186/12188 [2:39:24<00:16, 8.21s/it] 100%|█████████▉| 12187/12188 [2:39:32<00:08, 8.15s/it] {'loss': 0.3017, 'grad_norm': 0.6937437990715053, 'learning_rate': 7.061839213395871e-13, 'epoch': 1.0} + 100%|█████████▉| 12187/12188 [2:39:32<00:08, 8.15s/it] 100%|██████████| 12188/12188 [2:39:40<00:00, 7.97s/it] {'loss': 0.2951, 'grad_norm': 0.793574577878814, 'learning_rate': 1.765459833880101e-13, 'epoch': 1.0} + 100%|██████████| 12188/12188 [2:39:40<00:00, 7.97s/it] {'train_runtime': 9597.8769, 'train_samples_per_second': 650.242, 'train_steps_per_second': 1.27, 'train_loss': 0.02913342710690769, 'epoch': 1.0} + 100%|██████████| 12188/12188 [2:39:57<00:00, 7.97s/it] 100%|██████████| 12188/12188 [2:39:57<00:00, 1.27it/s]